Summary of the cohort and exploring data

##     study_id         Enumeration_date     Censored_Date           Censored     
##  Min.   :1.001e+09   Min.   :2017-01-03   Min.   :2017-01-12   Min.   :0.0000  
##  1st Qu.:3.194e+09   1st Qu.:2017-08-30   1st Qu.:2021-03-31   1st Qu.:0.0000  
##  Median :5.430e+09   Median :2018-07-30   Median :2021-03-31   Median :0.0000  
##  Mean   :5.441e+09   Mean   :2018-09-21   Mean   :2021-01-24   Mean   :0.1011  
##  3rd Qu.:7.664e+09   3rd Qu.:2019-08-22   3rd Qu.:2021-03-31   3rd Qu.:0.0000  
##  Max.   :9.998e+09   Max.   :2021-03-30   Max.   :2021-08-05   Max.   :1.0000  
##                                                                                
##      Linked            Time        Enumeration_CD4 Enumeration_Facility
##  Min.   :0.0000   Min.   :   0.0   Min.   :  0.0   Length:13344        
##  1st Qu.:1.0000   1st Qu.: 489.8   1st Qu.: 57.0   Class :character    
##  Median :1.0000   Median : 895.5   Median :111.0   Mode  :character    
##  Mean   :0.8357   Mean   : 856.1   Mean   :106.9                       
##  3rd Qu.:1.0000   3rd Qu.:1272.0   3rd Qu.:158.0                       
##  Max.   :1.0000   Max.   :1620.0   Max.   :199.0                       
##                                                                        
##    Latest_CD4    Enum_CD4_Cat       Enumeration_CD4_Cat   E_CD4_Cat     
##  Min.   :  0.0   Length:13344       Length:13344        Min.   :0.0000  
##  1st Qu.: 60.0   Class :character   Class :character    1st Qu.:0.0000  
##  Median :116.0   Mode  :character   Mode  :character    Median :0.0000  
##  Mean   :109.5                                          Mean   :0.6654  
##  3rd Qu.:162.0                                          3rd Qu.:1.0000  
##  Max.   :199.0                                          Max.   :2.0000  
##                                                                         
##  Hospital_Enumerations    VL_Enum           VL_Latest         VL_E_cat        
##  Min.   :0.0000        Min.   :       0   Min.   :      0   Length:13344      
##  1st Qu.:0.0000        1st Qu.:       0   1st Qu.:      0   Class :character  
##  Median :0.0000        Median :    4420   Median :      0   Mode  :character  
##  Mean   :0.1546        Mean   :  157778   Mean   :  53363                     
##  3rd Qu.:0.0000        3rd Qu.:   79222   3rd Qu.:    858                     
##  Max.   :1.0000        Max.   :48267460   Max.   :8484391                     
##                        NA's   :9310       NA's   :3869                        
##     VL_E_Cat     VL_E_Not_Done      VL_L_cat            VL_L_Cat    
##  Min.   :0.000   Min.   :0.0000   Length:13344       Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.0000   Class :character   1st Qu.:0.000  
##  Median :2.000   Median :1.0000   Mode  :character   Median :1.000  
##  Mean   :1.243   Mean   :0.6977                      Mean   :1.294  
##  3rd Qu.:2.000   3rd Qu.:1.0000                      3rd Qu.:3.000  
##  Max.   :2.000   Max.   :1.0000                      Max.   :3.000  
##  NA's   :9310                                                       
##  Incident_Crypto    Blood_Pos_I     Reflex_Pos_I   Prevalent_Crypto 
##  Min.   :0.00000   Min.   :0.00    Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:1.00    1st Qu.:1.000   1st Qu.:0.00000  
##  Median :0.00000   Median :1.00    Median :1.000   Median :0.00000  
##  Mean   :0.01589   Mean   :0.92    Mean   :0.783   Mean   :0.01686  
##  3rd Qu.:0.00000   3rd Qu.:1.00    3rd Qu.:1.000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00    Max.   :1.000   Max.   :1.00000  
##                    NA's   :13132   NA's   :13132                    
##   Blood_Pos_P     Reflex_Pos_P   Previous_Crypto    Blood_Pos_Prev 
##  Min.   :0.000   Min.   :0.000   Min.   :0.000000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.000000   1st Qu.:0.000  
##  Median :1.000   Median :1.000   Median :0.000000   Median :1.000  
##  Mean   :0.947   Mean   :0.889   Mean   :0.008918   Mean   :0.672  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:0.000000   3rd Qu.:1.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000000   Max.   :1.000  
##  NA's   :13119   NA's   :13119                      NA's   :13225  
##  Reflex_Pos_Prev Reflex_LFA_Done  Reflex_LFA_Positive first_ART_date      
##  Min.   :0.000   Min.   :0.0000   Min.   :1           Min.   :2000-01-03  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:1           1st Qu.:2012-10-16  
##  Median :0.000   Median :1.0000   Median :1           Median :2016-06-15  
##  Mean   :0.244   Mean   :0.5127   Mean   :1           Mean   :2015-06-18  
##  3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:1           3rd Qu.:2018-08-10  
##  Max.   :1.000   Max.   :1.0000   Max.   :1           Max.   :2021-03-30  
##  NA's   :13225                    NA's   :13015       NA's   :249         
##  recent_ART       ART_Exp_C         Subs_Initiated  Date_First_Initiated
##  Mode :logical   Length:13344       Min.   :0.000   Min.   :2017-01-03  
##  FALSE:10713     Class :character   1st Qu.:1.000   1st Qu.:2017-11-14  
##  TRUE :2631      Mode  :character   Median :1.000   Median :2018-11-13  
##                                     Mean   :0.953   Mean   :2018-11-26  
##                                     3rd Qu.:1.000   3rd Qu.:2019-10-09  
##                                     Max.   :1.000   Max.   :2021-03-30  
##                                     NA's   :7659    NA's   :7925        
##  Time_To_First_ART  Reinitiated    Date_Reinitiated     Time_Reinitiation
##  Min.   :   0.00   Min.   :0.000   Min.   :2017-01-03   Min.   :   0.0   
##  1st Qu.:   0.00   1st Qu.:1.000   1st Qu.:2017-11-09   1st Qu.:   0.0   
##  Median :  10.00   Median :1.000   Median :2018-10-08   Median :   7.0   
##  Mean   :  49.62   Mean   :0.919   Mean   :2018-11-14   Mean   :  39.4   
##  3rd Qu.:  28.00   3rd Qu.:1.000   3rd Qu.:2019-10-04   3rd Qu.:  25.0   
##  Max.   :1503.00   Max.   :1.000   Max.   :2021-03-31   Max.   :1287.0   
##  NA's   :7925      NA's   :8064    NA's   :8491         NA's   :8491     
##       DOB                  Sex             Age           AgeCat         
##  Min.   :1929-07-06   Min.   :1.000   Min.   :18.01   Length:13344      
##  1st Qu.:1975-12-27   1st Qu.:1.000   1st Qu.:30.34   Class :character  
##  Median :1982-11-17   Median :2.000   Median :35.78   Mode  :character  
##  Mean   :1981-08-22   Mean   :1.571   Mean   :37.08                     
##  3rd Qu.:1988-06-04   3rd Qu.:2.000   3rd Qu.:42.70                     
##  Max.   :2002-09-28   Max.   :2.000   Max.   :89.25                     
##                                                                         
##     Age_Cat         Age_cat              Sex_B        NPR_Vital_Status
##  Min.   :0.0000   Length:13344       Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   Class :character   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Mode  :character   Median :0.0000   Median :0.0000  
##  Mean   :0.3375                      Mean   :0.4293   Mean   :0.1021  
##  3rd Qu.:1.0000                      3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000                      Max.   :1.0000   Max.   :1.0000  
##                                                                       
##     NPR_Date                      Current_Diabetes  Current_Hypertension
##  Min.   :2017-01-12 00:00:00.00   Min.   :0.00000   Min.   :0.00000     
##  1st Qu.:2018-06-23 12:00:00.00   1st Qu.:0.00000   1st Qu.:0.00000     
##  Median :2019-07-10 00:00:00.00   Median :0.00000   Median :0.00000     
##  Mean   :2019-06-22 15:16:39.11   Mean   :0.02608   Mean   :0.08933     
##  3rd Qu.:2020-06-29 00:00:00.00   3rd Qu.:0.00000   3rd Qu.:0.00000     
##  Max.   :2021-08-05 00:00:00.00   Max.   :1.00000   Max.   :1.00000     
##  NA's   :11982                                                          
##   Current_CKD       Current_AKI      Previous_AKI     Incident_AKI   
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.01963   Mean   :0.1212   Mean   :0.1318   Mean   :0.1418  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                      
##  Current_TB_PHDC  Previous_TB_PHDC Incident_TB_PHDC
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2548   Mean   :0.3293   Mean   :0.1658  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
## 

make changes to data structure and lables

#Changes to Data
Cohort$Enumeration_CD4 <- as.numeric(Cohort$Enumeration_CD4)

Data <- select(Cohort, study_id, Censored, Time, Enumeration_CD4, VL_E_Cat,VL_E_Not_Done, E_CD4_Cat, Hospital_Enumerations,
               Incident_Crypto, Prevalent_Crypto, Previous_Crypto,
               ART_Exp_C, Subs_Initiated, Time_To_First_ART, Reinitiated, Time_Reinitiation
               , Sex_B, Age, Age_Cat, Linked, Current_Diabetes, Current_Hypertension,    
               Current_CKD, Current_TB_PHDC, Previous_TB_PHDC, Incident_TB_PHDC)

Data <- within(Data, {
  `Viral Load at Enumeration` <- factor(VL_E_Cat, labels = c("< 100", "100 -1000", "> 1000"))
  `Enumeration VL Not Done` <- VL_E_Not_Done
ART <- factor(ART_Exp_C, levels = c("On ART","LTFU","Naive"), labels = c("On ART","LTFU","ART Naive"))  
  LTFU <- factor(Reinitiated, labels = c("Never Re-initiated", "Re-initiated"))
  `ART Naive` <- factor(Subs_Initiated, labels = c("Never Initiaited", "Subsequently Initiated"))
  `Enumeration CD4` <- Enumeration_CD4
  `Enumeration CD4 Categorical` <- factor(E_CD4_Cat, labels = c("101 - 199", "51 - 100", "0 - 50"))
  `Time in Cohort` <- Time
  `Time to Initiation` <- Time_To_First_ART
  `Time to Re-initiation` <- Time_Reinitiation
  Mortality <- factor(Censored, labels = c("Survived", "Died"))
  Sex <- factor(Sex_B, labels = c("Female", "Male"))
  `Sex (Male)` <- Sex_B
  `Age > 40` <- Age_Cat
  `Age` <- Age
  `Incident Cryptococcosis` <- Incident_Crypto
  `Current Cryptococcosis` <- Prevalent_Crypto
  `Previous Cryptococcosis` <- Previous_Crypto
  `Enumerated in Hospital` <- Hospital_Enumerations
  `NPR Linkage` <- factor(Linked, labels = c("Unlinked", "Linked"))
  `Diabetic` <- Current_Diabetes
  `Hypertension` <- Current_Hypertension
  `Chronic Kidney Disease` <- Current_CKD
  `Current TB (PHDC)` <- Current_TB_PHDC
  `Previous TB (PHDC)` <- Previous_TB_PHDC
  `Incident TB (PHDC)` <- Incident_TB_PHDC})

Data <- select(Data, study_id, `Time`, `Viral Load at Enumeration`, `Enumeration VL Not Done`, 
               `Enumeration CD4`, `Enumerated in Hospital`, `Enumeration CD4 Categorical`,
               `NPR Linkage`, `Mortality`,
               `Sex`, `Sex (Male)`, `Age > 40`, `Age`, 
               `Incident TB (PHDC)`, `Previous TB (PHDC)`, `Current TB (PHDC)`,
               `Previous Cryptococcosis`, `Current Cryptococcosis`, `Incident Cryptococcosis`, 
               `Chronic Kidney Disease`, `Hypertension`, `Diabetic`, 
               `ART Naive`, `LTFU`, `ART`, 
               `Time to Initiation`, `Time to Re-initiation`, Censored, VL_E_Cat)

colnames(Data)
##  [1] "study_id"                    "Time"                       
##  [3] "Viral Load at Enumeration"   "Enumeration VL Not Done"    
##  [5] "Enumeration CD4"             "Enumerated in Hospital"     
##  [7] "Enumeration CD4 Categorical" "NPR Linkage"                
##  [9] "Mortality"                   "Sex"                        
## [11] "Sex (Male)"                  "Age > 40"                   
## [13] "Age"                         "Incident TB (PHDC)"         
## [15] "Previous TB (PHDC)"          "Current TB (PHDC)"          
## [17] "Previous Cryptococcosis"     "Current Cryptococcosis"     
## [19] "Incident Cryptococcosis"     "Chronic Kidney Disease"     
## [21] "Hypertension"                "Diabetic"                   
## [23] "ART Naive"                   "LTFU"                       
## [25] "ART"                         "Time to Initiation"         
## [27] "Time to Re-initiation"       "Censored"                   
## [29] "VL_E_Cat"
Data$AgeCat_4<-cut(Data$Age, c(0,25,35,45,100))
summary(Data$AgeCat_4)
##   (0,25]  (25,35]  (35,45] (45,100] 
##     1031     5154     4637     2522
Data$Age_Cat_4 <- NA
Data$Age_Cat_4[Data$AgeCat_4=="(0,25]"] <- 0
Data$Age_Cat_4[Data$AgeCat_4=="(25,35]"] <- 1
Data$Age_Cat_4[Data$AgeCat_4=="(35,45]"] <- 2
Data$Age_Cat_4[Data$AgeCat_4=="(45,100]"] <- 3
table(Data$Age_Cat_4)
## 
##    0    1    2    3 
## 1031 5154 4637 2522
names(Data)[30] <- "Age_cat"
Data <- within(Data, {Age_cat <- factor(Age_cat, labels = c("18-25", "25-35", "35-45", ">45"))})
Data$Age_cat = relevel(Data$Age_cat, ref = "25-35")
summary(Data$Age_cat)
## 25-35 18-25 35-45   >45 
##  5154  1031  4637  2522

Plot by mortality

Mort_Plots <- subset(Cohort, Linked > 0)
Mort_Plots <- within(Mort_Plots, {  Mortality <- factor(Censored, labels = c("Survived", "Died"))})

ggdensity(Mort_Plots, x = "Enumeration_CD4",
          add = "median",
          color = "Mortality", fill = "Mortality",
          palette = c("#00AFBB", "#E7B800"))

ggdensity(Mort_Plots, x = "Time",
          add = "median",
          color = "Mortality", fill = "Mortality",
          palette = c("#00AFBB", "#E7B800"))

ggdensity(Mort_Plots, x = "Time_To_First_ART",
          add = "median",
          color = "Mortality", fill = "Mortality",
          palette = c("#00AFBB", "#E7B800"))
## Warning: Removed 6627 rows containing non-finite outside the scale range
## (`stat_density()`).

ggdensity(Mort_Plots, x = "Time_Reinitiation",
          add = "median",
          color = "Mortality", fill = "Mortality",
          palette = c("#00AFBB", "#E7B800"))
## Warning: Removed 7035 rows containing non-finite outside the scale range
## (`stat_density()`).

ggdensity(Mort_Plots, x = "Age",
          add = "median",
          color = "Mortality", fill = "Mortality",
          palette = c("#00AFBB", "#E7B800"))

Plot by ART

Create stratified summary tables

## Table_1
library(tableone)
CreateTableOne(data = Data)
##                                         
##                                          Overall                      
##   n                                              13344                
##   study_id (mean (SD))                   5441175670.67 (2587845360.28)
##   Time (mean (SD))                              856.07 (463.40)       
##   Viral Load at Enumeration (%)                                       
##      < 100                                        1311 (32.5)         
##      100 -1000                                     431 (10.7)         
##      > 1000                                       2292 (56.8)         
##   Enumeration VL Not Done (mean (SD))             0.70 (0.46)         
##   Enumeration CD4 (mean (SD))                   106.89 (57.78)        
##   Enumerated in Hospital (mean (SD))              0.15 (0.36)         
##   Enumeration CD4 Categorical (%)                                     
##      101 - 199                                    7368 (55.2)         
##      51 - 100                                     3073 (23.0)         
##      0 - 50                                       2903 (21.8)         
##   NPR Linkage = Linked (%)                       11152 (83.6)         
##   Mortality = Died (%)                            1349 (10.1)         
##   Sex = Male (%)                                  5728 (42.9)         
##   Sex (Male) (mean (SD))                          0.43 (0.49)         
##   Age > 40 (mean (SD))                            0.34 (0.47)         
##   Age (mean (SD))                                37.08 (9.45)         
##   Incident TB (PHDC) (mean (SD))                  0.17 (0.37)         
##   Previous TB (PHDC) (mean (SD))                  0.33 (0.47)         
##   Current TB (PHDC) (mean (SD))                   0.25 (0.44)         
##   Previous Cryptococcosis (mean (SD))             0.01 (0.09)         
##   Current Cryptococcosis (mean (SD))              0.02 (0.13)         
##   Incident Cryptococcosis (mean (SD))             0.02 (0.13)         
##   Chronic Kidney Disease (mean (SD))              0.02 (0.14)         
##   Hypertension (mean (SD))                        0.09 (0.29)         
##   Diabetic (mean (SD))                            0.03 (0.16)         
##   ART Naive = Subsequently Initiated (%)          5419 (95.3)         
##   LTFU = Re-initiated (%)                         4853 (91.9)         
##   ART (%)                                                             
##      On ART                                       2379 (17.8)         
##      LTFU                                         5280 (39.6)         
##      ART Naive                                    5685 (42.6)         
##   Time to Initiation (mean (SD))                 49.62 (140.82)       
##   Time to Re-initiation (mean (SD))              39.41 (110.89)       
##   Censored (mean (SD))                            0.10 (0.30)         
##   VL_E_Cat (mean (SD))                            1.24 (0.91)         
##   Age_cat (%)                                                         
##      25-35                                        5154 (38.6)         
##      18-25                                        1031 ( 7.7)         
##      35-45                                        4637 (34.7)         
##      >45                                          2522 (18.9)         
##   Age_Cat_4 (mean (SD))                           1.65 (0.87)
myVars <- c("Mortality", "NPR Linkage", "Sex", "Age > 40", "Age", "Enumeration CD4", "Enumeration CD4 Categorical",
            "Time in Cohort",
            "Viral Load at Enumeration", "Enumeration VL Not Done",  
            "ART", "ART Naive", "Time to Initiation", "LTFU", "Time to Re-initiation", 
            "Enumerated in Hospital",
            "Current TB (PHDC)", "Previous TB (PHDC)", "Incident TB (PHDC)",
            "Current Cryptococcosis", "Previous Cryptococcosis",  "Incident Cryptococcosis","Age_cat")

catVars <- c("Mortality", "NPR Linkage", "Sex", "Age > 40", "Enumeration CD4 Categorical",
             "Viral Load at Enumeration", "Enumeration VL Not Done",
             "Enumerated in Hospital", "Current TB (PHDC)", "Previous TB (PHDC)", "Incident TB (PHDC)",
             "ART", "ART Naive", "LTFU",
             "Previous Cryptococcosis", "Current Cryptococcosis", "Incident Cryptococcosis","Age_cat")

tab2 <- CreateTableOne(vars = myVars, data = Data, factorVars = catVars)
#Add non-normality
non_normal <- c("Age", "Time to Re-initiation", "Time to Initiation", "Time in Cohort", "Enumeration CD4")
print(tab2, nonnormal = non_normal, quote = TRUE, noSpaces = TRUE)
##                                           ""
##  ""                                        "Overall"               
##   "n"                                      "13344"                 
##   "Mortality = Died (%)"                   "1349 (10.1)"           
##   "NPR Linkage = Linked (%)"               "11152 (83.6)"          
##   "Sex = Male (%)"                         "5728 (42.9)"           
##   "Age > 40 = 1 (%)"                       "4503 (33.7)"           
##   "Age (median [IQR])"                     "35.78 [30.34, 42.70]"  
##   "Enumeration CD4 (median [IQR])"         "111.00 [57.00, 158.00]"
##   "Enumeration CD4 Categorical (%)"        ""                      
##   "   101 - 199"                           "7368 (55.2)"           
##   "   51 - 100"                            "3073 (23.0)"           
##   "   0 - 50"                              "2903 (21.8)"           
##   "Viral Load at Enumeration (%)"          ""                      
##   "   < 100"                               "1311 (32.5)"           
##   "   100 -1000"                           "431 (10.7)"            
##   "   > 1000"                              "2292 (56.8)"           
##   "Enumeration VL Not Done = 1 (%)"        "9310 (69.8)"           
##   "ART (%)"                                ""                      
##   "   On ART"                              "2379 (17.8)"           
##   "   LTFU"                                "5280 (39.6)"           
##   "   ART Naive"                           "5685 (42.6)"           
##   "ART Naive = Subsequently Initiated (%)" "5419 (95.3)"           
##   "Time to Initiation (median [IQR])"      "10.00 [0.00, 28.00]"   
##   "LTFU = Re-initiated (%)"                "4853 (91.9)"           
##   "Time to Re-initiation (median [IQR])"   "7.00 [0.00, 25.00]"    
##   "Enumerated in Hospital = 1 (%)"         "2063 (15.5)"           
##   "Current TB (PHDC) = 1 (%)"              "3400 (25.5)"           
##   "Previous TB (PHDC) = 1 (%)"             "4394 (32.9)"           
##   "Incident TB (PHDC) = 1 (%)"             "2212 (16.6)"           
##   "Current Cryptococcosis = 1 (%)"         "225 (1.7)"             
##   "Previous Cryptococcosis = 1 (%)"        "119 (0.9)"             
##   "Incident Cryptococcosis = 1 (%)"        "212 (1.6)"             
##   "Age_cat (%)"                            ""                      
##   "   25-35"                               "5154 (38.6)"           
##   "   18-25"                               "1031 (7.7)"            
##   "   35-45"                               "4637 (34.7)"           
##   "   >45"                                 "2522 (18.9)"
#Linked vs unlinked
tab_linked <- CreateTableOne(vars = myVars, strata = "NPR Linkage" , data = Data, factorVars = catVars)
print(tab_linked, nonnormal = non_normal, quote = TRUE, noSpaces = TRUE)
##                                           "Stratified by NPR Linkage"
##  ""                                        "Unlinked"              
##   "n"                                      "2192"                  
##   "Mortality = Died (%)"                   "0 (0.0)"               
##   "NPR Linkage = Linked (%)"               "0 (0.0)"               
##   "Sex = Male (%)"                         "1024 (46.7)"           
##   "Age > 40 = 1 (%)"                       "863 (39.4)"            
##   "Age (median [IQR])"                     "37.24 [31.51, 44.35]"  
##   "Enumeration CD4 (median [IQR])"         "107.00 [56.00, 157.25]"
##   "Enumeration CD4 Categorical (%)"        ""                      
##   "   101 - 199"                           "1176 (53.6)"           
##   "   51 - 100"                            "520 (23.7)"            
##   "   0 - 50"                              "496 (22.6)"            
##   "Viral Load at Enumeration (%)"          ""                      
##   "   < 100"                               "211 (29.6)"            
##   "   100 -1000"                           "66 (9.2)"              
##   "   > 1000"                              "437 (61.2)"            
##   "Enumeration VL Not Done = 1 (%)"        "1478 (67.4)"           
##   "ART (%)"                                ""                      
##   "   On ART"                              "422 (19.3)"            
##   "   LTFU"                                "813 (37.1)"            
##   "   ART Naive"                           "957 (43.7)"            
##   "ART Naive = Subsequently Initiated (%)" "894 (93.4)"            
##   "Time to Initiation (median [IQR])"      "12.00 [0.25, 27.00]"   
##   "LTFU = Re-initiated (%)"                "736 (90.5)"            
##   "Time to Re-initiation (median [IQR])"   "5.00 [0.00, 21.00]"    
##   "Enumerated in Hospital = 1 (%)"         "393 (17.9)"            
##   "Current TB (PHDC) = 1 (%)"              "611 (27.9)"            
##   "Previous TB (PHDC) = 1 (%)"             "831 (37.9)"            
##   "Incident TB (PHDC) = 1 (%)"             "351 (16.0)"            
##   "Current Cryptococcosis = 1 (%)"         "40 (1.8)"              
##   "Previous Cryptococcosis = 1 (%)"        "27 (1.2)"              
##   "Incident Cryptococcosis = 1 (%)"        "36 (1.6)"              
##   "Age_cat (%)"                            ""                      
##   "   25-35"                               "753 (34.4)"            
##   "   18-25"                               "136 (6.2)"             
##   "   35-45"                               "806 (36.8)"            
##   "   >45"                                 "497 (22.7)"            
##                                           "Stratified by NPR Linkage"
##  ""                                        "Linked"                 "p"     
##   "n"                                      "11152"                  ""      
##   "Mortality = Died (%)"                   "1349 (12.1)"            "<0.001"
##   "NPR Linkage = Linked (%)"               "11152 (100.0)"          "<0.001"
##   "Sex = Male (%)"                         "4704 (42.2)"            "<0.001"
##   "Age > 40 = 1 (%)"                       "3640 (32.6)"            "<0.001"
##   "Age (median [IQR])"                     "35.54 [30.16, 42.36]"   "<0.001"
##   "Enumeration CD4 (median [IQR])"         "111.00 [58.00, 158.00]" "0.400" 
##   "Enumeration CD4 Categorical (%)"        ""                       "0.268" 
##   "   101 - 199"                           "6192 (55.5)"            ""      
##   "   51 - 100"                            "2553 (22.9)"            ""      
##   "   0 - 50"                              "2407 (21.6)"            ""      
##   "Viral Load at Enumeration (%)"          ""                       "0.031" 
##   "   < 100"                               "1100 (33.1)"            ""      
##   "   100 -1000"                           "365 (11.0)"             ""      
##   "   > 1000"                              "1855 (55.9)"            ""      
##   "Enumeration VL Not Done = 1 (%)"        "7832 (70.2)"            "0.010" 
##   "ART (%)"                                ""                       "0.021" 
##   "   On ART"                              "1957 (17.5)"            ""      
##   "   LTFU"                                "4467 (40.1)"            ""      
##   "   ART Naive"                           "4728 (42.4)"            ""      
##   "ART Naive = Subsequently Initiated (%)" "4525 (95.7)"            "0.003" 
##   "Time to Initiation (median [IQR])"      "10.00 [0.00, 28.00]"    "0.398" 
##   "LTFU = Re-initiated (%)"                "4117 (92.2)"            "0.133" 
##   "Time to Re-initiation (median [IQR])"   "7.00 [0.00, 26.00]"     "0.033" 
##   "Enumerated in Hospital = 1 (%)"         "1670 (15.0)"            "0.001" 
##   "Current TB (PHDC) = 1 (%)"              "2789 (25.0)"            "0.005" 
##   "Previous TB (PHDC) = 1 (%)"             "3563 (31.9)"            "<0.001"
##   "Incident TB (PHDC) = 1 (%)"             "1861 (16.7)"            "0.456" 
##   "Current Cryptococcosis = 1 (%)"         "185 (1.7)"              "0.645" 
##   "Previous Cryptococcosis = 1 (%)"        "92 (0.8)"               "0.084" 
##   "Incident Cryptococcosis = 1 (%)"        "176 (1.6)"              "0.900" 
##   "Age_cat (%)"                            ""                       "<0.001"
##   "   25-35"                               "4401 (39.5)"            ""      
##   "   18-25"                               "895 (8.0)"              ""      
##   "   35-45"                               "3831 (34.4)"            ""      
##   "   >45"                                 "2025 (18.2)"            ""      
##                                           "Stratified by NPR Linkage"
##  ""                                        "test"   
##   "n"                                      ""       
##   "Mortality = Died (%)"                   ""       
##   "NPR Linkage = Linked (%)"               ""       
##   "Sex = Male (%)"                         ""       
##   "Age > 40 = 1 (%)"                       ""       
##   "Age (median [IQR])"                     "nonnorm"
##   "Enumeration CD4 (median [IQR])"         "nonnorm"
##   "Enumeration CD4 Categorical (%)"        ""       
##   "   101 - 199"                           ""       
##   "   51 - 100"                            ""       
##   "   0 - 50"                              ""       
##   "Viral Load at Enumeration (%)"          ""       
##   "   < 100"                               ""       
##   "   100 -1000"                           ""       
##   "   > 1000"                              ""       
##   "Enumeration VL Not Done = 1 (%)"        ""       
##   "ART (%)"                                ""       
##   "   On ART"                              ""       
##   "   LTFU"                                ""       
##   "   ART Naive"                           ""       
##   "ART Naive = Subsequently Initiated (%)" ""       
##   "Time to Initiation (median [IQR])"      "nonnorm"
##   "LTFU = Re-initiated (%)"                ""       
##   "Time to Re-initiation (median [IQR])"   "nonnorm"
##   "Enumerated in Hospital = 1 (%)"         ""       
##   "Current TB (PHDC) = 1 (%)"              ""       
##   "Previous TB (PHDC) = 1 (%)"             ""       
##   "Incident TB (PHDC) = 1 (%)"             ""       
##   "Current Cryptococcosis = 1 (%)"         ""       
##   "Previous Cryptococcosis = 1 (%)"        ""       
##   "Incident Cryptococcosis = 1 (%)"        ""       
##   "Age_cat (%)"                            ""       
##   "   25-35"                               ""       
##   "   18-25"                               ""       
##   "   35-45"                               ""       
##   "   >45"                                 ""
#Stratified (Mortality status)
Linked_Data <- subset(Data, `NPR Linkage` == "Linked")
tab3 <- CreateTableOne(vars = myVars, strata = "Mortality" , data = Linked_Data, factorVars = catVars)
print(tab3, nonnormal = non_normal, quote = TRUE, noSpaces = TRUE)
##                                           "Stratified by Mortality"
##  ""                                        "Survived"              
##   "n"                                      "9803"                  
##   "Mortality = Died (%)"                   "0 (0.0)"               
##   "NPR Linkage = Linked (%)"               "9803 (100.0)"          
##   "Sex = Male (%)"                         "4069 (41.5)"           
##   "Age > 40 = 1 (%)"                       "3080 (31.4)"           
##   "Age (median [IQR])"                     "35.27 [29.96, 41.86]"  
##   "Enumeration CD4 (median [IQR])"         "117.00 [64.00, 161.00]"
##   "Enumeration CD4 Categorical (%)"        ""                      
##   "   101 - 199"                           "5692 (58.1)"           
##   "   51 - 100"                            "2225 (22.7)"           
##   "   0 - 50"                              "1886 (19.2)"           
##   "Viral Load at Enumeration (%)"          ""                      
##   "   < 100"                               "947 (34.3)"            
##   "   100 -1000"                           "318 (11.5)"            
##   "   > 1000"                              "1493 (54.1)"           
##   "Enumeration VL Not Done = 1 (%)"        "7045 (71.9)"           
##   "ART (%)"                                ""                      
##   "   On ART"                              "1611 (16.4)"           
##   "   LTFU"                                "3859 (39.4)"           
##   "   ART Naive"                           "4333 (44.2)"           
##   "ART Naive = Subsequently Initiated (%)" "4181 (96.5)"           
##   "Time to Initiation (median [IQR])"      "9.00 [0.00, 28.00]"    
##   "LTFU = Re-initiated (%)"                "3617 (93.7)"           
##   "Time to Re-initiation (median [IQR])"   "6.00 [0.00, 25.00]"    
##   "Enumerated in Hospital = 1 (%)"         "1248 (12.7)"           
##   "Current TB (PHDC) = 1 (%)"              "2295 (23.4)"           
##   "Previous TB (PHDC) = 1 (%)"             "2902 (29.6)"           
##   "Incident TB (PHDC) = 1 (%)"             "1533 (15.6)"           
##   "Current Cryptococcosis = 1 (%)"         "135 (1.4)"             
##   "Previous Cryptococcosis = 1 (%)"        "75 (0.8)"              
##   "Incident Cryptococcosis = 1 (%)"        "126 (1.3)"             
##   "Age_cat (%)"                            ""                      
##   "   25-35"                               "3976 (40.6)"           
##   "   18-25"                               "805 (8.2)"             
##   "   35-45"                               "3350 (34.2)"           
##   "   >45"                                 "1672 (17.1)"           
##                                           "Stratified by Mortality"
##  ""                                        "Died"                  "p"     
##   "n"                                      "1349"                  ""      
##   "Mortality = Died (%)"                   "1349 (100.0)"          "<0.001"
##   "NPR Linkage = Linked (%)"               "1349 (100.0)"          "NA"    
##   "Sex = Male (%)"                         "635 (47.1)"            "<0.001"
##   "Age > 40 = 1 (%)"                       "560 (41.5)"            "<0.001"
##   "Age (median [IQR])"                     "37.70 [32.03, 45.39]"  "<0.001"
##   "Enumeration CD4 (median [IQR])"         "69.00 [30.00, 128.00]" "<0.001"
##   "Enumeration CD4 Categorical (%)"        ""                      "<0.001"
##   "   101 - 199"                           "500 (37.1)"            ""      
##   "   51 - 100"                            "328 (24.3)"            ""      
##   "   0 - 50"                              "521 (38.6)"            ""      
##   "Viral Load at Enumeration (%)"          ""                      "<0.001"
##   "   < 100"                               "153 (27.2)"            ""      
##   "   100 -1000"                           "47 (8.4)"              ""      
##   "   > 1000"                              "362 (64.4)"            ""      
##   "Enumeration VL Not Done = 1 (%)"        "787 (58.3)"            "<0.001"
##   "ART (%)"                                ""                      "<0.001"
##   "   On ART"                              "346 (25.6)"            ""      
##   "   LTFU"                                "608 (45.1)"            ""      
##   "   ART Naive"                           "395 (29.3)"            ""      
##   "ART Naive = Subsequently Initiated (%)" "344 (87.1)"            "<0.001"
##   "Time to Initiation (median [IQR])"      "15.00 [4.00, 31.00]"   "<0.001"
##   "LTFU = Re-initiated (%)"                "500 (82.2)"            "<0.001"
##   "Time to Re-initiation (median [IQR])"   "8.00 [1.00, 27.00]"    "0.006" 
##   "Enumerated in Hospital = 1 (%)"         "422 (31.3)"            "<0.001"
##   "Current TB (PHDC) = 1 (%)"              "494 (36.6)"            "<0.001"
##   "Previous TB (PHDC) = 1 (%)"             "661 (49.0)"            "<0.001"
##   "Incident TB (PHDC) = 1 (%)"             "328 (24.3)"            "<0.001"
##   "Current Cryptococcosis = 1 (%)"         "50 (3.7)"              "<0.001"
##   "Previous Cryptococcosis = 1 (%)"        "17 (1.3)"              "0.085" 
##   "Incident Cryptococcosis = 1 (%)"        "50 (3.7)"              "<0.001"
##   "Age_cat (%)"                            ""                      "<0.001"
##   "   25-35"                               "425 (31.5)"            ""      
##   "   18-25"                               "90 (6.7)"              ""      
##   "   35-45"                               "481 (35.7)"            ""      
##   "   >45"                                 "353 (26.2)"            ""      
##                                           "Stratified by Mortality"
##  ""                                        "test"   
##   "n"                                      ""       
##   "Mortality = Died (%)"                   ""       
##   "NPR Linkage = Linked (%)"               ""       
##   "Sex = Male (%)"                         ""       
##   "Age > 40 = 1 (%)"                       ""       
##   "Age (median [IQR])"                     "nonnorm"
##   "Enumeration CD4 (median [IQR])"         "nonnorm"
##   "Enumeration CD4 Categorical (%)"        ""       
##   "   101 - 199"                           ""       
##   "   51 - 100"                            ""       
##   "   0 - 50"                              ""       
##   "Viral Load at Enumeration (%)"          ""       
##   "   < 100"                               ""       
##   "   100 -1000"                           ""       
##   "   > 1000"                              ""       
##   "Enumeration VL Not Done = 1 (%)"        ""       
##   "ART (%)"                                ""       
##   "   On ART"                              ""       
##   "   LTFU"                                ""       
##   "   ART Naive"                           ""       
##   "ART Naive = Subsequently Initiated (%)" ""       
##   "Time to Initiation (median [IQR])"      "nonnorm"
##   "LTFU = Re-initiated (%)"                ""       
##   "Time to Re-initiation (median [IQR])"   "nonnorm"
##   "Enumerated in Hospital = 1 (%)"         ""       
##   "Current TB (PHDC) = 1 (%)"              ""       
##   "Previous TB (PHDC) = 1 (%)"             ""       
##   "Incident TB (PHDC) = 1 (%)"             ""       
##   "Current Cryptococcosis = 1 (%)"         ""       
##   "Previous Cryptococcosis = 1 (%)"        ""       
##   "Incident Cryptococcosis = 1 (%)"        ""       
##   "Age_cat (%)"                            ""       
##   "   25-35"                               ""       
##   "   18-25"                               ""       
##   "   35-45"                               ""       
##   "   >45"                                 ""
#Stratified table 2 (LTFU/Re-initiation status)
tab4 <- CreateTableOne(vars = myVars, strata = "LTFU" , data = Data, factorVars = catVars)
print(tab4, nonnormal = non_normal, quote = TRUE, noSpaces = TRUE)
##                                           "Stratified by LTFU"
##  ""                                        "Never Re-initiated"   
##   "n"                                      "427"                  
##   "Mortality = Died (%)"                   "108 (25.3)"           
##   "NPR Linkage = Linked (%)"               "350 (82.0)"           
##   "Sex = Male (%)"                         "168 (39.3)"           
##   "Age > 40 = 1 (%)"                       "161 (37.7)"           
##   "Age (median [IQR])"                     "37.33 [31.12, 43.60]" 
##   "Enumeration CD4 (median [IQR])"         "95.00 [42.50, 157.00]"
##   "Enumeration CD4 Categorical (%)"        ""                     
##   "   101 - 199"                           "201 (47.1)"           
##   "   51 - 100"                            "104 (24.4)"           
##   "   0 - 50"                              "122 (28.6)"           
##   "Viral Load at Enumeration (%)"          ""                     
##   "   < 100"                               "24 (17.1)"            
##   "   100 -1000"                           "19 (13.6)"            
##   "   > 1000"                              "97 (69.3)"            
##   "Enumeration VL Not Done = 1 (%)"        "287 (67.2)"           
##   "ART (%)"                                ""                     
##   "   On ART"                              "0 (0.0)"              
##   "   LTFU"                                "427 (100.0)"          
##   "   ART Naive"                           "0 (0.0)"              
##   "ART Naive = Subsequently Initiated (%)" "0 (NaN)"              
##   "Time to Initiation (median [IQR])"      "NA [NA, NA]"          
##   "LTFU = Re-initiated (%)"                "0 (0.0)"              
##   "Time to Re-initiation (median [IQR])"   "NA [NA, NA]"          
##   "Enumerated in Hospital = 1 (%)"         "194 (45.4)"           
##   "Current TB (PHDC) = 1 (%)"              "121 (28.3)"           
##   "Previous TB (PHDC) = 1 (%)"             "213 (49.9)"           
##   "Incident TB (PHDC) = 1 (%)"             "35 (8.2)"             
##   "Current Cryptococcosis = 1 (%)"         "15 (3.5)"             
##   "Previous Cryptococcosis = 1 (%)"        "6 (1.4)"              
##   "Incident Cryptococcosis = 1 (%)"        "1 (0.2)"              
##   "Age_cat (%)"                            ""                     
##   "   25-35"                               "140 (32.8)"           
##   "   18-25"                               "31 (7.3)"             
##   "   35-45"                               "170 (39.8)"           
##   "   >45"                                 "86 (20.1)"            
##                                           "Stratified by LTFU"
##  ""                                        "Re-initiated"           "p"     
##   "n"                                      "4853"                   ""      
##   "Mortality = Died (%)"                   "500 (10.3)"             "<0.001"
##   "NPR Linkage = Linked (%)"               "4117 (84.8)"            "0.133" 
##   "Sex = Male (%)"                         "1820 (37.5)"            "0.483" 
##   "Age > 40 = 1 (%)"                       "1592 (32.8)"            "0.045" 
##   "Age (median [IQR])"                     "35.90 [30.69, 42.37]"   "0.049" 
##   "Enumeration CD4 (median [IQR])"         "107.00 [53.00, 155.00]" "0.086" 
##   "Enumeration CD4 Categorical (%)"        ""                       "0.026" 
##   "   101 - 199"                           "2583 (53.2)"            ""      
##   "   51 - 100"                            "1131 (23.3)"            ""      
##   "   0 - 50"                              "1139 (23.5)"            ""      
##   "Viral Load at Enumeration (%)"          ""                       "0.353" 
##   "   < 100"                               "304 (22.1)"             ""      
##   "   100 -1000"                           "158 (11.5)"             ""      
##   "   > 1000"                              "912 (66.4)"             ""      
##   "Enumeration VL Not Done = 1 (%)"        "3479 (71.7)"            "0.057" 
##   "ART (%)"                                ""                       "NaN"   
##   "   On ART"                              "0 (0.0)"                ""      
##   "   LTFU"                                "4853 (100.0)"           ""      
##   "   ART Naive"                           "0 (0.0)"                ""      
##   "ART Naive = Subsequently Initiated (%)" "0 (NaN)"                "NA"    
##   "Time to Initiation (median [IQR])"      "NA [NA, NA]"            "NA"    
##   "LTFU = Re-initiated (%)"                "4853 (100.0)"           "<0.001"
##   "Time to Re-initiation (median [IQR])"   "7.00 [0.00, 25.00]"     "NA"    
##   "Enumerated in Hospital = 1 (%)"         "661 (13.6)"             "<0.001"
##   "Current TB (PHDC) = 1 (%)"              "1157 (23.8)"            "0.043" 
##   "Previous TB (PHDC) = 1 (%)"             "2292 (47.2)"            "0.316" 
##   "Incident TB (PHDC) = 1 (%)"             "998 (20.6)"             "<0.001"
##   "Current Cryptococcosis = 1 (%)"         "77 (1.6)"               "0.006" 
##   "Previous Cryptococcosis = 1 (%)"        "46 (0.9)"               "0.508" 
##   "Incident Cryptococcosis = 1 (%)"        "102 (2.1)"              "0.013" 
##   "Age_cat (%)"                            ""                       "0.096" 
##   "   25-35"                               "1888 (38.9)"            ""      
##   "   18-25"                               "325 (6.7)"              ""      
##   "   35-45"                               "1781 (36.7)"            ""      
##   "   >45"                                 "859 (17.7)"             ""      
##                                           "Stratified by LTFU"
##  ""                                        "test"   
##   "n"                                      ""       
##   "Mortality = Died (%)"                   ""       
##   "NPR Linkage = Linked (%)"               ""       
##   "Sex = Male (%)"                         ""       
##   "Age > 40 = 1 (%)"                       ""       
##   "Age (median [IQR])"                     "nonnorm"
##   "Enumeration CD4 (median [IQR])"         "nonnorm"
##   "Enumeration CD4 Categorical (%)"        ""       
##   "   101 - 199"                           ""       
##   "   51 - 100"                            ""       
##   "   0 - 50"                              ""       
##   "Viral Load at Enumeration (%)"          ""       
##   "   < 100"                               ""       
##   "   100 -1000"                           ""       
##   "   > 1000"                              ""       
##   "Enumeration VL Not Done = 1 (%)"        ""       
##   "ART (%)"                                ""       
##   "   On ART"                              ""       
##   "   LTFU"                                ""       
##   "   ART Naive"                           ""       
##   "ART Naive = Subsequently Initiated (%)" ""       
##   "Time to Initiation (median [IQR])"      "nonnorm"
##   "LTFU = Re-initiated (%)"                ""       
##   "Time to Re-initiation (median [IQR])"   "nonnorm"
##   "Enumerated in Hospital = 1 (%)"         ""       
##   "Current TB (PHDC) = 1 (%)"              ""       
##   "Previous TB (PHDC) = 1 (%)"             ""       
##   "Incident TB (PHDC) = 1 (%)"             ""       
##   "Current Cryptococcosis = 1 (%)"         ""       
##   "Previous Cryptococcosis = 1 (%)"        ""       
##   "Incident Cryptococcosis = 1 (%)"        ""       
##   "Age_cat (%)"                            ""       
##   "   25-35"                               ""       
##   "   18-25"                               ""       
##   "   35-45"                               ""       
##   "   >45"                                 ""
#Stratified table 3 (ART Naive/initiation status)
tab5 <- CreateTableOne(vars = myVars, strata = "ART Naive" , data = Data, factorVars = catVars)
print(tab5, nonnormal = non_normal, quote = TRUE, noSpaces = TRUE)
##                                           "Stratified by ART Naive"
##  ""                                        "Never Initiaited"      
##   "n"                                      "266"                   
##   "Mortality = Died (%)"                   "51 (19.2)"             
##   "NPR Linkage = Linked (%)"               "203 (76.3)"            
##   "Sex = Male (%)"                         "138 (51.9)"            
##   "Age > 40 = 1 (%)"                       "82 (30.8)"             
##   "Age (median [IQR])"                     "35.03 [29.84, 42.39]"  
##   "Enumeration CD4 (median [IQR])"         "103.50 [53.50, 154.00]"
##   "Enumeration CD4 Categorical (%)"        ""                      
##   "   101 - 199"                           "134 (50.4)"            
##   "   51 - 100"                            "68 (25.6)"             
##   "   0 - 50"                              "64 (24.1)"             
##   "Viral Load at Enumeration (%)"          ""                      
##   "   < 100"                               "9 (27.3)"              
##   "   100 -1000"                           "0 (0.0)"               
##   "   > 1000"                              "24 (72.7)"             
##   "Enumeration VL Not Done = 1 (%)"        "233 (87.6)"            
##   "ART (%)"                                ""                      
##   "   On ART"                              "0 (0.0)"               
##   "   LTFU"                                "0 (0.0)"               
##   "   ART Naive"                           "266 (100.0)"           
##   "ART Naive = Subsequently Initiated (%)" "0 (0.0)"               
##   "Time to Initiation (median [IQR])"      "NA [NA, NA]"           
##   "LTFU = Re-initiated (%)"                "0 (NaN)"               
##   "Time to Re-initiation (median [IQR])"   "NA [NA, NA]"           
##   "Enumerated in Hospital = 1 (%)"         "75 (28.2)"             
##   "Current TB (PHDC) = 1 (%)"              "111 (41.7)"            
##   "Previous TB (PHDC) = 1 (%)"             "72 (27.1)"             
##   "Incident TB (PHDC) = 1 (%)"             "41 (15.4)"             
##   "Current Cryptococcosis = 1 (%)"         "12 (4.5)"              
##   "Previous Cryptococcosis = 1 (%)"        "0 (0.0)"               
##   "Incident Cryptococcosis = 1 (%)"        "3 (1.1)"               
##   "Age_cat (%)"                            ""                      
##   "   25-35"                               "115 (43.2)"            
##   "   18-25"                               "17 (6.4)"              
##   "   35-45"                               "88 (33.1)"             
##   "   >45"                                 "46 (17.3)"             
##                                           "Stratified by ART Naive"
##  ""                                        "Subsequently Initiated" "p"     
##   "n"                                      "5419"                   ""      
##   "Mortality = Died (%)"                   "344 (6.3)"              "<0.001"
##   "NPR Linkage = Linked (%)"               "4525 (83.5)"            "0.003" 
##   "Sex = Male (%)"                         "2599 (48.0)"            "0.236" 
##   "Age > 40 = 1 (%)"                       "1606 (29.6)"            "0.729" 
##   "Age (median [IQR])"                     "34.50 [29.23, 41.48]"   "0.174" 
##   "Enumeration CD4 (median [IQR])"         "108.00 [57.00, 157.00]" "0.300" 
##   "Enumeration CD4 Categorical (%)"        ""                       "0.464" 
##   "   101 - 199"                           "2940 (54.3)"            ""      
##   "   51 - 100"                            "1281 (23.6)"            ""      
##   "   0 - 50"                              "1198 (22.1)"            ""      
##   "Viral Load at Enumeration (%)"          ""                       "0.034" 
##   "   < 100"                               "94 (19.4)"              ""      
##   "   100 -1000"                           "80 (16.5)"              ""      
##   "   > 1000"                              "311 (64.1)"             ""      
##   "Enumeration VL Not Done = 1 (%)"        "4934 (91.1)"            "0.071" 
##   "ART (%)"                                ""                       "NaN"   
##   "   On ART"                              "0 (0.0)"                ""      
##   "   LTFU"                                "0 (0.0)"                ""      
##   "   ART Naive"                           "5419 (100.0)"           ""      
##   "ART Naive = Subsequently Initiated (%)" "5419 (100.0)"           "<0.001"
##   "Time to Initiation (median [IQR])"      "10.00 [0.00, 28.00]"    "NA"    
##   "LTFU = Re-initiated (%)"                "0 (NaN)"                "NA"    
##   "Time to Re-initiation (median [IQR])"   "NA [NA, NA]"            "NA"    
##   "Enumerated in Hospital = 1 (%)"         "567 (10.5)"             "<0.001"
##   "Current TB (PHDC) = 1 (%)"              "1644 (30.3)"            "<0.001"
##   "Previous TB (PHDC) = 1 (%)"             "407 (7.5)"              "<0.001"
##   "Incident TB (PHDC) = 1 (%)"             "701 (12.9)"             "0.281" 
##   "Current Cryptococcosis = 1 (%)"         "77 (1.4)"               "<0.001"
##   "Previous Cryptococcosis = 1 (%)"        "4 (0.1)"                "1.000" 
##   "Incident Cryptococcosis = 1 (%)"        "57 (1.1)"               "1.000" 
##   "Age_cat (%)"                            ""                       "0.309" 
##   "   25-35"                               "2298 (42.4)"            ""      
##   "   18-25"                               "535 (9.9)"              ""      
##   "   35-45"                               "1708 (31.5)"            ""      
##   "   >45"                                 "878 (16.2)"             ""      
##                                           "Stratified by ART Naive"
##  ""                                        "test"   
##   "n"                                      ""       
##   "Mortality = Died (%)"                   ""       
##   "NPR Linkage = Linked (%)"               ""       
##   "Sex = Male (%)"                         ""       
##   "Age > 40 = 1 (%)"                       ""       
##   "Age (median [IQR])"                     "nonnorm"
##   "Enumeration CD4 (median [IQR])"         "nonnorm"
##   "Enumeration CD4 Categorical (%)"        ""       
##   "   101 - 199"                           ""       
##   "   51 - 100"                            ""       
##   "   0 - 50"                              ""       
##   "Viral Load at Enumeration (%)"          ""       
##   "   < 100"                               ""       
##   "   100 -1000"                           ""       
##   "   > 1000"                              ""       
##   "Enumeration VL Not Done = 1 (%)"        ""       
##   "ART (%)"                                ""       
##   "   On ART"                              ""       
##   "   LTFU"                                ""       
##   "   ART Naive"                           ""       
##   "ART Naive = Subsequently Initiated (%)" ""       
##   "Time to Initiation (median [IQR])"      "nonnorm"
##   "LTFU = Re-initiated (%)"                ""       
##   "Time to Re-initiation (median [IQR])"   "nonnorm"
##   "Enumerated in Hospital = 1 (%)"         ""       
##   "Current TB (PHDC) = 1 (%)"              ""       
##   "Previous TB (PHDC) = 1 (%)"             ""       
##   "Incident TB (PHDC) = 1 (%)"             ""       
##   "Current Cryptococcosis = 1 (%)"         ""       
##   "Previous Cryptococcosis = 1 (%)"        ""       
##   "Incident Cryptococcosis = 1 (%)"        ""       
##   "Age_cat (%)"                            ""       
##   "   25-35"                               ""       
##   "   18-25"                               ""       
##   "   35-45"                               ""       
##   "   >45"                                 ""
#Stratified table 4 (ART)
tab6 <- CreateTableOne(vars = myVars, strata = "ART" , data = Data, factorVars = catVars)
print(tab6, nonnormal = non_normal, quote = TRUE, noSpaces = TRUE)
##                                           "Stratified by ART"
##  ""                                        "On ART"                
##   "n"                                      "2379"                  
##   "Mortality = Died (%)"                   "346 (14.5)"            
##   "NPR Linkage = Linked (%)"               "1957 (82.3)"           
##   "Sex = Male (%)"                         "1003 (42.2)"           
##   "Age > 40 = 1 (%)"                       "1062 (44.6)"           
##   "Age (median [IQR])"                     "38.56 [32.65, 45.91]"  
##   "Enumeration CD4 (median [IQR])"         "126.00 [74.00, 166.00]"
##   "Enumeration CD4 Categorical (%)"        ""                      
##   "   101 - 199"                           "1510 (63.5)"           
##   "   51 - 100"                            "489 (20.6)"            
##   "   0 - 50"                              "380 (16.0)"            
##   "Viral Load at Enumeration (%)"          ""                      
##   "   < 100"                               "880 (44.0)"            
##   "   100 -1000"                           "174 (8.7)"             
##   "   > 1000"                              "948 (47.4)"            
##   "Enumeration VL Not Done = 1 (%)"        "377 (15.8)"            
##   "ART (%)"                                ""                      
##   "   On ART"                              "2379 (100.0)"          
##   "   LTFU"                                "0 (0.0)"               
##   "   ART Naive"                           "0 (0.0)"               
##   "ART Naive = Subsequently Initiated (%)" "0 (NaN)"               
##   "Time to Initiation (median [IQR])"      "NA [NA, NA]"           
##   "LTFU = Re-initiated (%)"                "0 (NaN)"               
##   "Time to Re-initiation (median [IQR])"   "NA [NA, NA]"           
##   "Enumerated in Hospital = 1 (%)"         "566 (23.8)"            
##   "Current TB (PHDC) = 1 (%)"              "367 (15.4)"            
##   "Previous TB (PHDC) = 1 (%)"             "1410 (59.3)"           
##   "Incident TB (PHDC) = 1 (%)"             "437 (18.4)"            
##   "Current Cryptococcosis = 1 (%)"         "44 (1.8)"              
##   "Previous Cryptococcosis = 1 (%)"        "63 (2.6)"              
##   "Incident Cryptococcosis = 1 (%)"        "49 (2.1)"              
##   "Age_cat (%)"                            ""                      
##   "   25-35"                               "713 (30.0)"            
##   "   18-25"                               "123 (5.2)"             
##   "   35-45"                               "890 (37.4)"            
##   "   >45"                                 "653 (27.4)"            
##                                           "Stratified by ART"
##  ""                                        "LTFU"                  
##   "n"                                      "5280"                  
##   "Mortality = Died (%)"                   "608 (11.5)"            
##   "NPR Linkage = Linked (%)"               "4467 (84.6)"           
##   "Sex = Male (%)"                         "1988 (37.7)"           
##   "Age > 40 = 1 (%)"                       "1753 (33.2)"           
##   "Age (median [IQR])"                     "36.02 [30.74, 42.43]"  
##   "Enumeration CD4 (median [IQR])"         "106.00 [52.00, 155.00]"
##   "Enumeration CD4 Categorical (%)"        ""                      
##   "   101 - 199"                           "2784 (52.7)"           
##   "   51 - 100"                            "1235 (23.4)"           
##   "   0 - 50"                              "1261 (23.9)"           
##   "Viral Load at Enumeration (%)"          ""                      
##   "   < 100"                               "328 (21.7)"            
##   "   100 -1000"                           "177 (11.7)"            
##   "   > 1000"                              "1009 (66.6)"           
##   "Enumeration VL Not Done = 1 (%)"        "3766 (71.3)"           
##   "ART (%)"                                ""                      
##   "   On ART"                              "0 (0.0)"               
##   "   LTFU"                                "5280 (100.0)"          
##   "   ART Naive"                           "0 (0.0)"               
##   "ART Naive = Subsequently Initiated (%)" "0 (NaN)"               
##   "Time to Initiation (median [IQR])"      "NA [NA, NA]"           
##   "LTFU = Re-initiated (%)"                "4853 (91.9)"           
##   "Time to Re-initiation (median [IQR])"   "7.00 [0.00, 25.00]"    
##   "Enumerated in Hospital = 1 (%)"         "855 (16.2)"            
##   "Current TB (PHDC) = 1 (%)"              "1278 (24.2)"           
##   "Previous TB (PHDC) = 1 (%)"             "2505 (47.4)"           
##   "Incident TB (PHDC) = 1 (%)"             "1033 (19.6)"           
##   "Current Cryptococcosis = 1 (%)"         "92 (1.7)"              
##   "Previous Cryptococcosis = 1 (%)"        "52 (1.0)"              
##   "Incident Cryptococcosis = 1 (%)"        "103 (2.0)"             
##   "Age_cat (%)"                            ""                      
##   "   25-35"                               "2028 (38.4)"           
##   "   18-25"                               "356 (6.7)"             
##   "   35-45"                               "1951 (37.0)"           
##   "   >45"                                 "945 (17.9)"            
##                                           "Stratified by ART"
##  ""                                        "ART Naive"              "p"     
##   "n"                                      "5685"                   ""      
##   "Mortality = Died (%)"                   "395 (6.9)"              "<0.001"
##   "NPR Linkage = Linked (%)"               "4728 (83.2)"            "0.021" 
##   "Sex = Male (%)"                         "2737 (48.1)"            "<0.001"
##   "Age > 40 = 1 (%)"                       "1688 (29.7)"            "<0.001"
##   "Age (median [IQR])"                     "34.53 [29.29, 41.53]"   "<0.001"
##   "Enumeration CD4 (median [IQR])"         "108.00 [56.00, 157.00]" "<0.001"
##   "Enumeration CD4 Categorical (%)"        ""                       "<0.001"
##   "   101 - 199"                           "3074 (54.1)"            ""      
##   "   51 - 100"                            "1349 (23.7)"            ""      
##   "   0 - 50"                              "1262 (22.2)"            ""      
##   "Viral Load at Enumeration (%)"          ""                       "<0.001"
##   "   < 100"                               "103 (19.9)"             ""      
##   "   100 -1000"                           "80 (15.4)"              ""      
##   "   > 1000"                              "335 (64.7)"             ""      
##   "Enumeration VL Not Done = 1 (%)"        "5167 (90.9)"            "<0.001"
##   "ART (%)"                                ""                       "<0.001"
##   "   On ART"                              "0 (0.0)"                ""      
##   "   LTFU"                                "0 (0.0)"                ""      
##   "   ART Naive"                           "5685 (100.0)"           ""      
##   "ART Naive = Subsequently Initiated (%)" "5419 (95.3)"            "NaN"   
##   "Time to Initiation (median [IQR])"      "10.00 [0.00, 28.00]"    "NA"    
##   "LTFU = Re-initiated (%)"                "0 (NaN)"                "NaN"   
##   "Time to Re-initiation (median [IQR])"   "NA [NA, NA]"            "NA"    
##   "Enumerated in Hospital = 1 (%)"         "642 (11.3)"             "<0.001"
##   "Current TB (PHDC) = 1 (%)"              "1755 (30.9)"            "<0.001"
##   "Previous TB (PHDC) = 1 (%)"             "479 (8.4)"              "<0.001"
##   "Incident TB (PHDC) = 1 (%)"             "742 (13.1)"             "<0.001"
##   "Current Cryptococcosis = 1 (%)"         "89 (1.6)"               "0.612" 
##   "Previous Cryptococcosis = 1 (%)"        "4 (0.1)"                "<0.001"
##   "Incident Cryptococcosis = 1 (%)"        "60 (1.1)"               "<0.001"
##   "Age_cat (%)"                            ""                       "<0.001"
##   "   25-35"                               "2413 (42.4)"            ""      
##   "   18-25"                               "552 (9.7)"              ""      
##   "   35-45"                               "1796 (31.6)"            ""      
##   "   >45"                                 "924 (16.3)"             ""      
##                                           "Stratified by ART"
##  ""                                        "test"   
##   "n"                                      ""       
##   "Mortality = Died (%)"                   ""       
##   "NPR Linkage = Linked (%)"               ""       
##   "Sex = Male (%)"                         ""       
##   "Age > 40 = 1 (%)"                       ""       
##   "Age (median [IQR])"                     "nonnorm"
##   "Enumeration CD4 (median [IQR])"         "nonnorm"
##   "Enumeration CD4 Categorical (%)"        ""       
##   "   101 - 199"                           ""       
##   "   51 - 100"                            ""       
##   "   0 - 50"                              ""       
##   "Viral Load at Enumeration (%)"          ""       
##   "   < 100"                               ""       
##   "   100 -1000"                           ""       
##   "   > 1000"                              ""       
##   "Enumeration VL Not Done = 1 (%)"        ""       
##   "ART (%)"                                ""       
##   "   On ART"                              ""       
##   "   LTFU"                                ""       
##   "   ART Naive"                           ""       
##   "ART Naive = Subsequently Initiated (%)" ""       
##   "Time to Initiation (median [IQR])"      "nonnorm"
##   "LTFU = Re-initiated (%)"                ""       
##   "Time to Re-initiation (median [IQR])"   "nonnorm"
##   "Enumerated in Hospital = 1 (%)"         ""       
##   "Current TB (PHDC) = 1 (%)"              ""       
##   "Previous TB (PHDC) = 1 (%)"             ""       
##   "Incident TB (PHDC) = 1 (%)"             ""       
##   "Current Cryptococcosis = 1 (%)"         ""       
##   "Previous Cryptococcosis = 1 (%)"        ""       
##   "Incident Cryptococcosis = 1 (%)"        ""       
##   "Age_cat (%)"                            ""       
##   "   25-35"                               ""       
##   "   18-25"                               ""       
##   "   35-45"                               ""       
##   "   >45"                                 ""

Survival anaylsis

### Overall ART KM
ggsurvplot(
  fit = survfit(Surv(Time, Censored) ~ ART, data = Linked_Data), 
  risk.table = TRUE,
  cumevents = TRUE,
  conf.int = T,
  pval = TRUE,
  palette = "npg", 
  linetype = "solid",
  risk.table.height = 0.2,
  cumevents.height = 0.2,
  xlim = c(0,1000),
  xlab = "Days", 
  break.time.by = 90, 
  ylim = c(0, 1),
  ylab = "Survival",
  legend = "top",
  title = "Time to death among linked patients stratifed by ART exposure at enumeration")

### Zoomed in Overall ART KM
ggsurvplot(
  fit = survfit(Surv(Time, Censored) ~ ART, data = Linked_Data),
  legend = "none",
  risk.table = F,
  cumevents = F,
  conf.int = T,
  pval = F,
  palette = "npg", 
  linetype = "solid",
  xlim = c(0,1000),
  break.time.by = 180, 
  ylim = c(0.8, 1),
  xlab = "", 
  ylab = "")

### VL KM
### On ART by VL KM
L_Data <- subset(Data, `NPR linkage` = "Linked")
On_ART <- subset(L_Data, ART == "On ART")
On_ART$VL <- On_ART$`Viral Load at Enumeration`
summary(On_ART$VL)
##     < 100 100 -1000    > 1000      NA's 
##       880       174       948       377
ggsurvplot(
  fit = survfit(Surv(Time, Censored) ~ VL, data = On_ART), 
  risk.table = TRUE,
  cumevents = TRUE,
  conf.int = T,
  pval = TRUE,
  palette = "npg", 
  linetype = "solid",
  risk.table.height = 0.2,
  cumevents.height = 0.2,
  xlim = c(0,1000),
  xlab = "Days", 
  break.time.by = 90, 
  ylim = c(0, 1),
  ylab = "Survival",
  legend = "top",
  title = "Time to death among patients on ART at enumeration stratified by enumeration viral load")

# VL zoomed in 
ggsurvplot(
  fit = survfit(Surv(Time, Censored) ~ VL, data = On_ART),
  legend = "none",
  risk.table = F,
  cumevents = F,
  conf.int = T,
  pval = F,
  palette = "npg", 
  linetype = "solid",
  xlim = c(0,1000),
  break.time.by = 180, 
  ylim = c(0.8, 1),
  xlab = "", 
  ylab = "")

Modelling

Pre-process data again

## Processing data
Cohort$VL_E_Cat[is.na(Cohort$VL_E_Cat)] = 3

Cohort$Enumeration_CD4 <- as.numeric(Cohort$Enumeration_CD4)

Data <- select(Cohort, study_id, Censored, Time, Enumeration_CD4, VL_E_Cat, E_CD4_Cat, Hospital_Enumerations,
               Incident_Crypto, Prevalent_Crypto, Previous_Crypto,
               ART_Exp_C, Subs_Initiated, Time_To_First_ART, Reinitiated, Time_Reinitiation
               , Sex_B, Age, Age_Cat, Linked, Current_Diabetes, Current_Hypertension,    
               Current_CKD, Current_TB_PHDC, Previous_TB_PHDC, Incident_TB_PHDC)
summary(Data)
##     study_id            Censored           Time        Enumeration_CD4
##  Min.   :1.001e+09   Min.   :0.0000   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.:3.194e+09   1st Qu.:0.0000   1st Qu.: 489.8   1st Qu.: 57.0  
##  Median :5.430e+09   Median :0.0000   Median : 895.5   Median :111.0  
##  Mean   :5.441e+09   Mean   :0.1011   Mean   : 856.1   Mean   :106.9  
##  3rd Qu.:7.664e+09   3rd Qu.:0.0000   3rd Qu.:1272.0   3rd Qu.:158.0  
##  Max.   :9.998e+09   Max.   :1.0000   Max.   :1620.0   Max.   :199.0  
##                                                                       
##     VL_E_Cat       E_CD4_Cat      Hospital_Enumerations Incident_Crypto  
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000        Min.   :0.00000  
##  1st Qu.:2.000   1st Qu.:0.0000   1st Qu.:0.0000        1st Qu.:0.00000  
##  Median :3.000   Median :0.0000   Median :0.0000        Median :0.00000  
##  Mean   :2.469   Mean   :0.6654   Mean   :0.1546        Mean   :0.01589  
##  3rd Qu.:3.000   3rd Qu.:1.0000   3rd Qu.:0.0000        3rd Qu.:0.00000  
##  Max.   :3.000   Max.   :2.0000   Max.   :1.0000        Max.   :1.00000  
##                                                                          
##  Prevalent_Crypto  Previous_Crypto     ART_Exp_C         Subs_Initiated 
##  Min.   :0.00000   Min.   :0.000000   Length:13344       Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.000000   Class :character   1st Qu.:1.000  
##  Median :0.00000   Median :0.000000   Mode  :character   Median :1.000  
##  Mean   :0.01686   Mean   :0.008918                      Mean   :0.953  
##  3rd Qu.:0.00000   3rd Qu.:0.000000                      3rd Qu.:1.000  
##  Max.   :1.00000   Max.   :1.000000                      Max.   :1.000  
##                                                          NA's   :7659   
##  Time_To_First_ART  Reinitiated    Time_Reinitiation     Sex_B       
##  Min.   :   0.00   Min.   :0.000   Min.   :   0.0    Min.   :0.0000  
##  1st Qu.:   0.00   1st Qu.:1.000   1st Qu.:   0.0    1st Qu.:0.0000  
##  Median :  10.00   Median :1.000   Median :   7.0    Median :0.0000  
##  Mean   :  49.62   Mean   :0.919   Mean   :  39.4    Mean   :0.4293  
##  3rd Qu.:  28.00   3rd Qu.:1.000   3rd Qu.:  25.0    3rd Qu.:1.0000  
##  Max.   :1503.00   Max.   :1.000   Max.   :1287.0    Max.   :1.0000  
##  NA's   :7925      NA's   :8064    NA's   :8491                      
##       Age           Age_Cat           Linked       Current_Diabetes 
##  Min.   :18.01   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:30.34   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.00000  
##  Median :35.78   Median :0.0000   Median :1.0000   Median :0.00000  
##  Mean   :37.08   Mean   :0.3375   Mean   :0.8357   Mean   :0.02608  
##  3rd Qu.:42.70   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :89.25   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##                                                                     
##  Current_Hypertension  Current_CKD      Current_TB_PHDC  Previous_TB_PHDC
##  Min.   :0.00000      Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000      1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000      Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.08933      Mean   :0.01963   Mean   :0.2548   Mean   :0.3293  
##  3rd Qu.:0.00000      3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.00000      Max.   :1.00000   Max.   :1.0000   Max.   :1.0000  
##                                                                          
##  Incident_TB_PHDC
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.1658  
##  3rd Qu.:0.0000  
##  Max.   :1.0000  
## 
table(Data$ART_Exp_C)
## 
##   LTFU  Naive On ART 
##   5280   5685   2379
table(Data$Age_Cat)
## 
##    0    1 
## 8841 4503
Data <- within(Data, {
  `Viral Load at Enumeration` <- factor(VL_E_Cat, labels = c("< 100", "100 -1000", "> 1000", "None"))
  ART <- factor(ART_Exp_C, levels = c("On ART","LTFU","Naive"), labels = c("On ART","LTFU","ART Naive"))  
  Disengaged <- factor(Reinitiated, labels = c("Never Re-initiated", "Re-initiated"))
  `ART Naive` <- factor(Subs_Initiated, labels = c("Never Initiaited", "Subsequently Initiated"))
  `Enumeration CD4` <- Enumeration_CD4
  `Enumeration CD4 Categorical` <- factor(E_CD4_Cat, labels = c("101 - 199", "51 - 100", "0 - 50"))
  `Time in Cohort` <- Time
  `Time to Initiation` <- Time_To_First_ART
  `Time to Re-initiation` <- Time_Reinitiation
  Mortality <- factor(Censored, labels = c("Survived", "Died"))
  Sex <- factor(Sex_B, labels = c("Female", "Male"))
  `Sex (Male)` <- Sex_B
  `Age` <- Age
  `Incident Cryptococcosis` <- Incident_Crypto
  `Current Cryptococcosis` <- Prevalent_Crypto
  `Previous Cryptococcosis` <- Previous_Crypto
  `Enumerated in Hospital` <- Hospital_Enumerations
  `NPR Linkage` <- factor(Linked, labels = c("Unlinked", "Linked"))
  `Diabetic` <- Current_Diabetes
  `Hypertension` <- Current_Hypertension
  `Chronic Kidney Disease` <- Current_CKD
  `Current TB (PHDC)` <- Current_TB_PHDC
  `Previous TB (PHDC)` <- Previous_TB_PHDC
  `Incident TB (PHDC)` <- Incident_TB_PHDC
})

Data <- select(Data, study_id, `Time`, `Viral Load at Enumeration`, 
               `Enumeration CD4`, `Enumerated in Hospital`, `Enumeration CD4 Categorical`,
               `NPR Linkage`, `Mortality`,
               `Sex`, `Sex (Male)`, `Age`, 
               `Incident TB (PHDC)`, `Previous TB (PHDC)`, `Current TB (PHDC)`,
               `Previous Cryptococcosis`, `Current Cryptococcosis`, `Incident Cryptococcosis`, 
               `Chronic Kidney Disease`, `Hypertension`, `Diabetic`, 
               `ART Naive`, `Disengaged`, `ART`, 
               `Time to Initiation`, `Time to Re-initiation`, Censored, VL_E_Cat)


Linked_Data <- subset(Data, `NPR Linkage` == "Linked")

Linked_Data$AgeCat_4<-cut(Linked_Data$Age, c(0,25,35,45,100))
summary(Linked_Data$AgeCat_4)
##   (0,25]  (25,35]  (35,45] (45,100] 
##      895     4401     3831     2025
Linked_Data$Age_Cat_4 <- NA
Linked_Data$Age_Cat_4[Linked_Data$AgeCat_4=="(0,25]"] <- 0
Linked_Data$Age_Cat_4[Linked_Data$AgeCat_4=="(25,35]"] <- 1
Linked_Data$Age_Cat_4[Linked_Data$AgeCat_4=="(35,45]"] <- 2
Linked_Data$Age_Cat_4[Linked_Data$AgeCat_4=="(45,100]"] <- 3
table(Linked_Data$Age_Cat_4)
## 
##    0    1    2    3 
##  895 4401 3831 2025
Model_Data <- Linked_Data[, c("study_id", "Censored", "Time", "Age_Cat_4", "ART",
                              "Sex (Male)", "Enumeration CD4 Categorical","Previous TB (PHDC)",
                              "Incident TB (PHDC)" , "Current TB (PHDC)",
                              "Previous Cryptococcosis", "Viral Load at Enumeration", "Enumerated in Hospital")]

names(Model_Data)[4] <- "Age"
Model_Data <- within(Model_Data, {
  Age <- factor(Age, labels = c("18-25", "25-35", "35-45", ">45"))})
Model_Data$Age = relevel(Model_Data$Age, ref = "25-35")


Model_Data$Time <- Model_Data$Time + 0.5
names(Model_Data)[7] <- "CD4"
names(Model_Data)[8] <- "Previous TB"
names(Model_Data)[9] <- "Incident TB"
names(Model_Data)[10] <- "Current TB"


head(Model_Data)
## # A tibble: 6 × 13
##     study_id Censored  Time Age   ART       `Sex (Male)` CD4       `Previous TB`
##        <dbl>    <dbl> <dbl> <fct> <fct>            <dbl> <fct>             <dbl>
## 1 1001292400        0  874. 35-45 ART Naive            1 101 - 199             0
## 2 1001394208        0 1296. 25-35 ART Naive            1 101 - 199             0
## 3 1001490301        0 1462. 35-45 On ART               0 0 - 50                1
## 4 1001698318        0 1458. 18-25 On ART               1 101 - 199             0
## 5 1002697417        0 1472. 35-45 On ART               1 101 - 199             0
## 6 1002894013        0 1546. 25-35 On ART               0 101 - 199             0
## # ℹ 5 more variables: `Incident TB` <dbl>, `Current TB` <dbl>,
## #   `Previous Cryptococcosis` <dbl>, `Viral Load at Enumeration` <fct>,
## #   `Enumerated in Hospital` <dbl>

CPH Model 1

This model includes the variables: ART CD4 cat at enumeration Sex Age Previous TB Previous Crypto

## Call:
## coxph(formula = Surv(Time, Censored) ~ ART + `Sex (Male)` + Age + 
##     CD4 + `Previous TB` + `Previous Cryptococcosis`, data = Model_Data)
## 
##   n= 11152, number of events= 1349 
## 
##                               coef exp(coef) se(coef)      z Pr(>|z|)    
## ARTLTFU                   -0.19589   0.82210  0.06875 -2.849  0.00438 ** 
## ARTART Naive              -0.51801   0.59570  0.08381 -6.181 6.37e-10 ***
## `Sex (Male)`               0.03560   1.03625  0.05734  0.621  0.53464    
## Age18-25                   0.17795   1.19476  0.11642  1.528  0.12640    
## Age35-45                   0.17966   1.19681  0.06784  2.648  0.00809 ** 
## Age>45                     0.56229   1.75468  0.07445  7.553 4.26e-14 ***
## CD451 - 100                0.48027   1.61651  0.07156  6.711 1.93e-11 ***
## CD40 - 50                  1.06478   2.90020  0.06364 16.730  < 2e-16 ***
## `Previous TB`              0.38818   1.47430  0.06364  6.100 1.06e-09 ***
## `Previous Cryptococcosis` -0.12524   0.88228  0.24566 -0.510  0.61018    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                           exp(coef) exp(-coef) lower .95 upper .95
## ARTLTFU                      0.8221     1.2164    0.7185    0.9407
## ARTART Naive                 0.5957     1.6787    0.5055    0.7021
## `Sex (Male)`                 1.0362     0.9650    0.9261    1.1595
## Age18-25                     1.1948     0.8370    0.9510    1.5010
## Age35-45                     1.1968     0.8356    1.0478    1.3670
## Age>45                       1.7547     0.5699    1.5165    2.0303
## CD451 - 100                  1.6165     0.6186    1.4050    1.8599
## CD40 - 50                    2.9002     0.3448    2.5601    3.2855
## `Previous TB`                1.4743     0.6783    1.3014    1.6701
## `Previous Cryptococcosis`    0.8823     1.1334    0.5451    1.4280
## 
## Concordance= 0.689  (se = 0.007 )
## Likelihood ratio test= 516.9  on 10 df,   p=<2e-16
## Wald test            = 527.7  on 10 df,   p=<2e-16
## Score (logrank) test = 562.9  on 10 df,   p=<2e-16

It is important to note that the diagonstics of model 1 raise concerns about the PH assumption:

The statistically significant variables are age, CD4 count and ART status. A further analysis of these features is conducted after the uni-variate modelling. If one looks at the plots they highlight most of the hazzard diversion happen around the first +- year (with the worst in the first +- 90 days). Given p-values are not be the best way to evaluate the PH assumption given the large sample size the PH assumption has been evaluated for these features using KM plots, log-log curves and schoenfeld residuals. In this process it is noted that ART status is the most important violation of the PH assumption thus to better unpack this, stratifed models for each ART status are presented.

Stratifed CPH models for each ART category

On_ART <- subset(Model_Data, ART == "On ART")
Disengaged <- subset(Model_Data, ART == "LTFU")
Naive <- subset(Model_Data, ART == "ART Naive")

Surv_object_On_ART <- Surv(time = On_ART$Time, event = On_ART$Censored)
Surv_object_Disengaged <- Surv(time = Disengaged$Time, event = Disengaged$Censored)
Surv_object_Naive <- Surv(time = Naive$Time, event = Naive$Censored)

# Stratified Surv models
surv_ART1 <- coxph(Surv_object_On_ART ~ `Sex (Male)` + `Age` + `CD4` + 
                     `Previous TB` + `Previous Cryptococcosis`,data = On_ART)
summary(surv_ART1)
## Call:
## coxph(formula = Surv_object_On_ART ~ `Sex (Male)` + Age + CD4 + 
##     `Previous TB` + `Previous Cryptococcosis`, data = On_ART)
## 
##   n= 1957, number of events= 346 
## 
##                                coef exp(coef)  se(coef)      z Pr(>|z|)    
## `Sex (Male)`              -0.052477  0.948876  0.113747 -0.461  0.64455    
## Age18-25                  -0.286261  0.751067  0.294756 -0.971  0.33146    
## Age35-45                   0.003349  1.003355  0.135910  0.025  0.98034    
## Age>45                     0.392014  1.479959  0.143009  2.741  0.00612 ** 
## CD451 - 100                0.528239  1.695943  0.133969  3.943 8.05e-05 ***
## CD40 - 50                  0.953167  2.593911  0.129839  7.341 2.12e-13 ***
## `Previous TB`              0.369100  1.446432  0.120020  3.075  0.00210 ** 
## `Previous Cryptococcosis` -0.199629  0.819035  0.339536 -0.588  0.55657    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                           exp(coef) exp(-coef) lower .95 upper .95
## `Sex (Male)`                 0.9489     1.0539    0.7593     1.186
## Age18-25                     0.7511     1.3314    0.4215     1.338
## Age35-45                     1.0034     0.9967    0.7687     1.310
## Age>45                       1.4800     0.6757    1.1182     1.959
## CD451 - 100                  1.6959     0.5896    1.3043     2.205
## CD40 - 50                    2.5939     0.3855    2.0111     3.346
## `Previous TB`                1.4464     0.6914    1.1432     1.830
## `Previous Cryptococcosis`    0.8190     1.2209    0.4210     1.593
## 
## Concordance= 0.646  (se = 0.015 )
## Likelihood ratio test= 76.35  on 8 df,   p=3e-13
## Wald test            = 78.46  on 8 df,   p=1e-13
## Score (logrank) test = 82.32  on 8 df,   p=2e-14
surv_ART2 <- coxph(Surv_object_On_ART ~ `Sex (Male)` + `Age` + `Viral Load at Enumeration` + 
                     `Previous TB` + `Previous Cryptococcosis`,data = On_ART)
summary(surv_ART2)
## Call:
## coxph(formula = Surv_object_On_ART ~ `Sex (Male)` + Age + `Viral Load at Enumeration` + 
##     `Previous TB` + `Previous Cryptococcosis`, data = On_ART)
## 
##   n= 1957, number of events= 346 
## 
##                                           coef exp(coef)  se(coef)      z
## `Sex (Male)`                          0.019700  1.019895  0.113718  0.173
## Age18-25                             -0.252933  0.776520  0.294940 -0.858
## Age35-45                             -0.004276  0.995733  0.136112 -0.031
## Age>45                                0.342815  1.408908  0.142762  2.401
## `Viral Load at Enumeration`100 -1000  0.038080  1.038815  0.243825  0.156
## `Viral Load at Enumeration`> 1000     0.408996  1.505305  0.125272  3.265
## `Viral Load at Enumeration`None       0.225725  1.253231  0.171849  1.314
## `Previous TB`                         0.407826  1.503546  0.119965  3.400
## `Previous Cryptococcosis`            -0.143137  0.866636  0.339208 -0.422
##                                      Pr(>|z|)    
## `Sex (Male)`                         0.862467    
## Age18-25                             0.391128    
## Age35-45                             0.974938    
## Age>45                               0.016337 *  
## `Viral Load at Enumeration`100 -1000 0.875892    
## `Viral Load at Enumeration`> 1000    0.001095 ** 
## `Viral Load at Enumeration`None      0.189011    
## `Previous TB`                        0.000675 ***
## `Previous Cryptococcosis`            0.673045    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                      exp(coef) exp(-coef) lower .95 upper .95
## `Sex (Male)`                            1.0199     0.9805    0.8161     1.275
## Age18-25                                0.7765     1.2878    0.4356     1.384
## Age35-45                                0.9957     1.0043    0.7626     1.300
## Age>45                                  1.4089     0.7098    1.0650     1.864
## `Viral Load at Enumeration`100 -1000    1.0388     0.9626    0.6442     1.675
## `Viral Load at Enumeration`> 1000       1.5053     0.6643    1.1776     1.924
## `Viral Load at Enumeration`None         1.2532     0.7979    0.8949     1.755
## `Previous TB`                           1.5035     0.6651    1.1885     1.902
## `Previous Cryptococcosis`               0.8666     1.1539    0.4458     1.685
## 
## Concordance= 0.586  (se = 0.016 )
## Likelihood ratio test= 35.04  on 9 df,   p=6e-05
## Wald test            = 34.35  on 9 df,   p=8e-05
## Score (logrank) test = 34.8  on 9 df,   p=6e-05
surv_Dis <- coxph(Surv_object_Disengaged ~ `Sex (Male)` + `Age` + `CD4` + 
                    `Previous TB` + `Previous Cryptococcosis`,data = Disengaged)
summary(surv_Dis)
## Call:
## coxph(formula = Surv_object_Disengaged ~ `Sex (Male)` + Age + 
##     CD4 + `Previous TB` + `Previous Cryptococcosis`, data = Disengaged)
## 
##   n= 4467, number of events= 608 
## 
##                               coef exp(coef) se(coef)      z Pr(>|z|)    
## `Sex (Male)`              -0.02809   0.97230  0.08856 -0.317  0.75107    
## Age18-25                   0.43421   1.54375  0.16148  2.689  0.00717 ** 
## Age35-45                   0.09664   1.10147  0.10069  0.960  0.33713    
## Age>45                     0.52086   1.68348  0.11541  4.513 6.39e-06 ***
## CD451 - 100                0.53898   1.71426  0.11172  4.824 1.41e-06 ***
## CD40 - 50                  1.22582   3.40697  0.09605 12.763  < 2e-16 ***
## `Previous TB`              0.40420   1.49811  0.08575  4.714 2.43e-06 ***
## `Previous Cryptococcosis`  0.02175   1.02199  0.35729  0.061  0.95146    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                           exp(coef) exp(-coef) lower .95 upper .95
## `Sex (Male)`                 0.9723     1.0285    0.8174     1.157
## Age18-25                     1.5437     0.6478    1.1249     2.118
## Age35-45                     1.1015     0.9079    0.9042     1.342
## Age>45                       1.6835     0.5940    1.3427     2.111
## CD451 - 100                  1.7143     0.5833    1.3771     2.134
## CD40 - 50                    3.4070     0.2935    2.8224     4.113
## `Previous TB`                1.4981     0.6675    1.2663     1.772
## `Previous Cryptococcosis`    1.0220     0.9785    0.5074     2.059
## 
## Concordance= 0.688  (se = 0.011 )
## Likelihood ratio test= 235.8  on 8 df,   p=<2e-16
## Wald test            = 237.7  on 8 df,   p=<2e-16
## Score (logrank) test = 260.7  on 8 df,   p=<2e-16
surv_Naive <- coxph(Surv_object_Naive ~ `Sex (Male)` + `Age` + `CD4` + 
                      `Previous TB`,data = Naive)
summary(surv_Naive)
## Call:
## coxph(formula = Surv_object_Naive ~ `Sex (Male)` + Age + CD4 + 
##     `Previous TB`, data = Naive)
## 
##   n= 4728, number of events= 395 
## 
##                 coef exp(coef) se(coef)     z Pr(>|z|)    
## `Sex (Male)`  0.2298    1.2583   0.1037 2.215 0.026750 *  
## Age18-25      0.1452    1.1563   0.2083 0.697 0.485841    
## Age35-45      0.4542    1.5749   0.1246 3.647 0.000266 ***
## Age>45        0.8127    2.2539   0.1356 5.995 2.04e-09 ***
## CD451 - 100   0.3755    1.4557   0.1304 2.879 0.003994 ** 
## CD40 - 50     0.8974    2.4532   0.1173 7.650 2.00e-14 ***
## `Previous TB` 0.4299    1.5370   0.1530 2.810 0.004947 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## `Sex (Male)`      1.258     0.7947    1.0268     1.542
## Age18-25          1.156     0.8648    0.7686     1.739
## Age35-45          1.575     0.6350    1.2338     2.010
## Age>45            2.254     0.4437    1.7280     2.940
## CD451 - 100       1.456     0.6870    1.1273     1.880
## CD40 - 50         2.453     0.4076    1.9493     3.087
## `Previous TB`     1.537     0.6506    1.1389     2.074
## 
## Concordance= 0.676  (se = 0.014 )
## Likelihood ratio test= 122.2  on 7 df,   p=<2e-16
## Wald test            = 127.9  on 7 df,   p=<2e-16
## Score (logrank) test = 135.3  on 7 df,   p=<2e-16

Univariate modelling

## Univariate associations
univ_variables <- c("Age", "ART",
                    "Sex (Male)", "CD4","Previous TB",
                    "Incident TB" , "Current TB",
                    "Previous Cryptococcosis", "Viral Load at Enumeration", "Enumerated in Hospital")

univ_variables_strat <- c("Age", "Sex (Male)", "CD4","Previous TB",
                    "Incident TB" , "Current TB",
                    "Previous Cryptococcosis", "Viral Load at Enumeration", "Enumerated in Hospital")

univ_variables_strat2 <- c("Age", "Sex (Male)", "CD4","Previous TB",
                          "Incident TB" , "Current TB", "Enumerated in Hospital")

UV_all <- Model_Data %>% 
  dplyr::select(univ_variables, Censored) %>% # select variables to model
  tbl_uvregression(                         # produce univariate table
    method = glm,                           ## define regression 
    y = Censored,                            ## define outcome 
    method.args = list(family = binomial(log)),  ## define  type of glm 
    exponentiate = TRUE                     ## exponentiate to produce RR 
  )

UV_all <- UV_all %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

UV_ART <- On_ART %>% 
  dplyr::select(univ_variables_strat, Censored) %>% 
  tbl_uvregression(                         
    method = glm,                           
    y = Censored,                           
    method.args = list(family = binomial(log)),  
    exponentiate = TRUE                     
  )
UV_ART <- UV_ART %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )



UV_Dis <- Disengaged %>% 
  dplyr::select(univ_variables_strat, Censored) %>% 
  tbl_uvregression(                         
    method = glm,                           
    y = Censored,                            
    method.args = list(family = binomial(log)),  
    exponentiate = TRUE                     
  )

UV_Dis <- UV_Dis %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )


UV_Naive <- Naive %>% 
  dplyr::select(univ_variables_strat2, Censored) %>% 
  tbl_uvregression(                         
    method = glm,                           
    y = Censored,                            
    method.args = list(family = binomial(log)), 
    exponentiate = TRUE                     
    )
UV_Naive <- UV_Naive %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

CPH univariate

## Univariate associations using Cox models
library(survival)
library(dplyr)
library(gtsummary)

univ_variables <- c("Age", "ART", "Sex (Male)", "CD4", "Previous TB", "Incident TB", "Current TB", "Previous Cryptococcosis", "Viral Load at Enumeration", "Enumerated in Hospital")

univ_variables_strat <- c("Age", "Sex (Male)", "CD4", "Previous TB", "Incident TB", "Current TB", "Previous Cryptococcosis", "Viral Load at Enumeration", "Enumerated in Hospital")

univ_variables_strat2 <- c("Age", "Sex (Male)", "CD4", "Previous TB", "Incident TB", "Current TB", "Enumerated in Hospital")

# Create survival objects
Model_Data <- Model_Data #%>% mutate(Time = ..., Censored = ...) # Ensure Time and Censored columns are correctly set

Surv_object_Model_Data <- Surv(time = Model_Data$Time, event = Model_Data$Censored)

On_ART <- subset(Model_Data, ART == "On ART")
Disengaged <- subset(Model_Data, ART == "LTFU")
Naive <- subset(Model_Data, ART == "ART Naive")

Surv_object_On_ART <- Surv(time = On_ART$Time, event = On_ART$Censored)
Surv_object_Disengaged <- Surv(time = Disengaged$Time, event = Disengaged$Censored)
Surv_object_Naive <- Surv(time = Naive$Time, event = Naive$Censored)

# Univariate Cox models for all data
UV_all <- Model_Data %>% 
  dplyr::select(all_of(univ_variables), Time, Censored) %>% # select variables to model
  tbl_uvregression(                         # produce univariate table
    method = coxph,                         ## define regression 
    y = Surv(Time, Censored),               ## define outcome 
    exponentiate = TRUE                     ## exponentiate to produce HR 
  )

UV_all <- UV_all %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

# Univariate Cox models for On ART group
UV_ART <- On_ART %>% 
  dplyr::select(all_of(univ_variables_strat), Time, Censored) %>% 
  tbl_uvregression(                         
    method = coxph,                           
    y = Surv(Time, Censored),                           
    exponentiate = TRUE                     
  )

UV_ART <- UV_ART %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

# Univariate Cox models for Disengaged group
UV_Dis <- Disengaged %>% 
  dplyr::select(all_of(univ_variables_strat), Time, Censored) %>% 
  tbl_uvregression(                         
    method = coxph,                           
    y = Surv(Time, Censored),                            
    exponentiate = TRUE                     
  )

UV_Dis <- UV_Dis %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

# Univariate Cox models for Naive group
UV_Naive <- Naive %>% 
  dplyr::select(all_of(univ_variables_strat2), Time, Censored) %>% 
  tbl_uvregression(                         
    method = coxph,                           
    y = Surv(Time, Censored),                            
    exponentiate = TRUE                     
  )

UV_Naive <- UV_Naive %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

# Display tables
UV_all
Characteristic N HR 95% CI
Age 11,152

    25-35
— —
    18-25
1.03 0.82, 1.29
    35-45
1.36 1.19, 1.55
    >45
1.94 1.68, 2.23
ART 11,152

    On ART
— —
    LTFU
0.84 0.74, 0.96
    ART Naive
0.49 0.43, 0.57
Sex (Male) 11,152 1.22 1.09, 1.36
CD4 11,152

    101 - 199
— —
    51 - 100
1.60 1.39, 1.84
    0 - 50
2.94 2.60, 3.33
Previous TB 11,152 2.00 1.80, 2.23
Incident TB 11,152 1.34 1.19, 1.52
Current TB 11,152 1.88 1.68, 2.10
Previous Cryptococcosis 11,152 1.40 0.87, 2.25
Viral Load at Enumeration 11,152

    < 100
— —
    100 -1000
0.97 0.70, 1.35
    > 1000
1.53 1.27, 1.85
    None
0.78 0.65, 0.93
Enumerated in Hospital 11,152 3.11 2.77, 3.49
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio
UV_ART
Characteristic N HR 95% CI
Age 1,957

    25-35
— —
    18-25
0.76 0.43, 1.35
    35-45
1.01 0.77, 1.31
    >45
1.36 1.04, 1.78
Sex (Male) 1,957 1.11 0.90, 1.37
CD4 1,957

    101 - 199
— —
    51 - 100
1.71 1.31, 2.22
    0 - 50
2.59 2.01, 3.33
Previous TB 1,957 1.56 1.24, 1.97
Incident TB 1,957 1.17 0.92, 1.50
Current TB 1,957 2.18 1.71, 2.79
Previous Cryptococcosis 1,957 0.96 0.49, 1.85
Viral Load at Enumeration 1,957

    < 100
— —
    100 -1000
1.00 0.62, 1.61
    > 1000
1.50 1.17, 1.91
    None
1.18 0.85, 1.65
Enumerated in Hospital 1,957 3.57 2.88, 4.42
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio
UV_Dis
Characteristic N HR 95% CI
Age 4,467

    25-35
— —
    18-25
1.36 0.99, 1.86
    35-45
1.23 1.02, 1.49
    >45
1.82 1.47, 2.25
Sex (Male) 4,467 1.24 1.06, 1.46
CD4 4,467

    101 - 199
— —
    51 - 100
1.74 1.40, 2.17
    0 - 50
3.58 2.97, 4.31
Previous TB 4,467 1.75 1.49, 2.06
Incident TB 4,467 1.29 1.08, 1.54
Current TB 4,467 1.91 1.62, 2.25
Previous Cryptococcosis 4,467 1.36 0.68, 2.73
Viral Load at Enumeration 4,467

    < 100
— —
    100 -1000
1.08 0.63, 1.86
    > 1000
1.68 1.16, 2.41
    None
1.10 0.78, 1.55
Enumerated in Hospital 4,467 2.73 2.29, 3.25
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio
UV_Naive
Characteristic N HR 95% CI
Age 4,728

    25-35
— —
    18-25
0.97 0.65, 1.45
    35-45
1.69 1.33, 2.16
    >45
2.41 1.85, 3.13
Sex (Male) 4,728 1.46 1.19, 1.78
CD4 4,728

    101 - 199
— —
    51 - 100
1.49 1.16, 1.92
    0 - 50
2.65 2.11, 3.33
Previous TB 4,728 1.73 1.28, 2.32
Incident TB 4,728 1.27 0.98, 1.64
Current TB 4,728 2.30 1.89, 2.80
Enumerated in Hospital 4,728 2.62 2.06, 3.34
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio

Combining all models into one single table

MV_1 <- tbl_regression(surv_reg, exponentiate = TRUE,pvalue_fun = ~style_pvalue(.x, digits = 2),) %>% 
  bold_labels()
MV_1 <- MV_1 %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )


MV_2 <- tbl_regression(surv_ART1, exponentiate = TRUE,pvalue_fun = ~style_pvalue(.x, digits = 2),) %>% 
  bold_labels()
MV_2 <- MV_2 %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

MV_3 <- tbl_regression(surv_ART2, exponentiate = TRUE,pvalue_fun = ~style_pvalue(.x, digits = 2),) %>% 
  bold_labels()
MV_3 <- MV_3 %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

MV_4 <- tbl_regression(surv_Dis, exponentiate = TRUE,pvalue_fun = ~style_pvalue(.x, digits = 2),) %>% 
  bold_labels()
MV_4 <- MV_4 %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )

MV_5 <- tbl_regression(surv_Naive, exponentiate = TRUE,pvalue_fun = ~style_pvalue(.x, digits = 2),) %>% 
  bold_labels()
MV_5 <- MV_5 %>%
  modify_table_body(
    ~ .x %>% select(-p.value)
  )
theme_gtsummary_compact()
tbl_merge(
  tbls = list(UV_all, MV_1, UV_ART, MV_2, MV_3, UV_Dis, MV_4, UV_Naive, MV_5),   # combine
  tab_spanner = c("**Univariable**",
                  "**Overall**",
                  "**Univariable**",
                  "**On ART 1**", 
                  "**On ART 2**",
                  "**Univariable**",
                  "**Disengaged**", 
                  "**Univariable**",
                  "**ART Naive**"))
Characteristic
Univariable
Overall
Univariable
On ART 1
On ART 2
Univariable
Disengaged
Univariable
ART Naive
N HR 95% CI HR 95% CI N HR 95% CI HR 95% CI HR 95% CI N HR 95% CI HR 95% CI N HR 95% CI HR 95% CI
Age 11,152



1,957





4,467



4,728



    25-35
— — — —
— — — — — —
— — — —
— — — —
    18-25
1.03 0.82, 1.29 1.19 0.95, 1.50
0.76 0.43, 1.35 0.75 0.42, 1.34 0.78 0.44, 1.38
1.36 0.99, 1.86 1.54 1.12, 2.12
0.97 0.65, 1.45 1.16 0.77, 1.74
    35-45
1.36 1.19, 1.55 1.20 1.05, 1.37
1.01 0.77, 1.31 1.00 0.77, 1.31 1.00 0.76, 1.30
1.23 1.02, 1.49 1.10 0.90, 1.34
1.69 1.33, 2.16 1.57 1.23, 2.01
    >45
1.94 1.68, 2.23 1.75 1.52, 2.03
1.36 1.04, 1.78 1.48 1.12, 1.96 1.41 1.07, 1.86
1.82 1.47, 2.25 1.68 1.34, 2.11
2.41 1.85, 3.13 2.25 1.73, 2.94
ART 11,152




















    On ART
— — — —
















    LTFU
0.84 0.74, 0.96 0.82 0.72, 0.94
















    ART Naive
0.49 0.43, 0.57 0.60 0.51, 0.70
















Sex (Male) 11,152 1.22 1.09, 1.36 1.04 0.93, 1.16 1,957 1.11 0.90, 1.37 0.95 0.76, 1.19 1.02 0.82, 1.27 4,467 1.24 1.06, 1.46 0.97 0.82, 1.16 4,728 1.46 1.19, 1.78 1.26 1.03, 1.54
CD4 11,152



1,957





4,467



4,728



    101 - 199
— — — —
— — — —


— — — —
— — — —
    51 - 100
1.60 1.39, 1.84 1.62 1.40, 1.86
1.71 1.31, 2.22 1.70 1.30, 2.21


1.74 1.40, 2.17 1.71 1.38, 2.13
1.49 1.16, 1.92 1.46 1.13, 1.88
    0 - 50
2.94 2.60, 3.33 2.90 2.56, 3.29
2.59 2.01, 3.33 2.59 2.01, 3.35


3.58 2.97, 4.31 3.41 2.82, 4.11
2.65 2.11, 3.33 2.45 1.95, 3.09
Previous TB 11,152 2.00 1.80, 2.23 1.47 1.30, 1.67 1,957 1.56 1.24, 1.97 1.45 1.14, 1.83 1.50 1.19, 1.90 4,467 1.75 1.49, 2.06 1.50 1.27, 1.77 4,728 1.73 1.28, 2.32 1.54 1.14, 2.07
Incident TB 11,152 1.34 1.19, 1.52

1,957 1.17 0.92, 1.50



4,467 1.29 1.08, 1.54

4,728 1.27 0.98, 1.64

Current TB 11,152 1.88 1.68, 2.10

1,957 2.18 1.71, 2.79



4,467 1.91 1.62, 2.25

4,728 2.30 1.89, 2.80

Previous Cryptococcosis 11,152 1.40 0.87, 2.25 0.88 0.55, 1.43 1,957 0.96 0.49, 1.85 0.82 0.42, 1.59 0.87 0.45, 1.68 4,467 1.36 0.68, 2.73 1.02 0.51, 2.06




Viral Load at Enumeration 11,152



1,957





4,467








    < 100
— —


— —

— —
— —






    100 -1000
0.97 0.70, 1.35


1.00 0.62, 1.61

1.04 0.64, 1.68
1.08 0.63, 1.86






    > 1000
1.53 1.27, 1.85


1.50 1.17, 1.91

1.51 1.18, 1.92
1.68 1.16, 2.41






    None
0.78 0.65, 0.93


1.18 0.85, 1.65

1.25 0.89, 1.76
1.10 0.78, 1.55






Enumerated in Hospital 11,152 3.11 2.77, 3.49

1,957 3.57 2.88, 4.42



4,467 2.73 2.29, 3.25

4,728 2.62 2.06, 3.34

Abbreviations: CI = Confidence Interval, HR = Hazard Ratio

Diagnostics and alterantive considerations for overall CPH model

Diagnostics

##                            chisq df       p
## ART                       15.393  2 0.00045
## `Sex (Male)`               0.903  1 0.34186
## Age                       23.245  3 3.6e-05
## CD4                       14.387  2 0.00075
## `Previous TB`              1.627  1 0.20212
## `Previous Cryptococcosis`  0.288  1 0.59180
## GLOBAL                    58.727 10 6.3e-09
## `geom_smooth()` using formula = 'y ~ x'

Age

KM Curves and log-log curves

## Warning: Removed 285 rows containing missing values or values outside the scale range
## (`geom_step()`).
## Warning: Removed 269 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
## Warning: Removed 285 rows containing missing values or values outside the scale range
## (`geom_step()`).
## Warning: Removed 269 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

Schoenfield residuals

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

Summary of Age:

The age groups of 18-25 and 25-35 are clearly statstically significant as they cross. However, they seem so co-linear that it look like their hazard curves are almost the same in which case the crossing may not be a major issue.

CD4 count

KM Curves and log-log curves

## Warning: Removed 550 rows containing missing values or values outside the scale range
## (`geom_step()`).
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
## Warning: Removed 550 rows containing missing values or values outside the scale range
## (`geom_step()`).
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

Schoenfield residuals

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

Summary of CD4

CD4 doesn’t appear to violate the PH assumption signficiantly. Likely statistically significant because of the very beginning and end.

ART

KM Curves and log-log curves

## Warning: Removed 288 rows containing missing values or values outside the scale range
## (`geom_step()`).
## Warning: Removed 274 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
## Warning: Removed 288 rows containing missing values or values outside the scale range
## (`geom_step()`).
## Warning: Removed 274 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

Schoenfield residuals

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

Summary of ART

ART violates the PH assumption mostly in the early time period - this is best seen in the KM curves where the on ART group has an acute mortality risk that is much steeper than the other two groups. Various modelling strategies to overcome the violation of the PH assumption are subsequently evaluated. However, after evaluating all strategies and review with all authors it was concluded that ultimately the purpose of modelling in this study is not to get accurate estimates of mortality risk as the three ART groups reflect three different populations each confounded by important selection biases thus attempting to estimate accurate hazard/risk ratios over time would be attempting to approximate a false level of precision. Thus a decision was made to stick with CPH modelling while clearly understanding the limitations of it in this context.

Managing violations of the PH assumption

four approaches to managing the violation of the CPH assumption within ART were considered. 1- stratifying by ART category (implemented in final modelling) 2- estimating a fully parametric model 3- emplyoing step functions 4- using time transformations The most successful approaches were stratification and the use of an stpm2 model with 3 knots, 4 splines and a PH link function.

However as discussed above it was ultimately decided to go with a CPH model as 1) the violation of the PH assumption is limited to the first time period for reasons discussed in the discussion and 2) the purpose of modelling in this study is hypothesis generating rather than attempting to accurately estimate risk magnitudes.

Fitting stpm2 model

library(rstpm2)
## Loading required package: splines
## 
## Attaching package: 'rstpm2'
## The following object is masked from 'package:survival':
## 
##     colon
fit <- stpm2(Surv(Time, Censored) ~ ART, data = Model_Data, df=4)
eform(fit)[1:3,]
##                 exp(beta)        2.5 %       97.5 %
## (Intercept)  0.0008561144 0.0008112656 0.0009024998
## ARTLTFU      0.8346362570 0.7700323597 0.9027361177
## ARTART Naive 0.4877151354 0.4411838490 0.5373987454
# Fit Cox proportional hazards model
cox.fit <- coxph(Surv(Time, Censored) ~ ART, data = Model_Data)

# Create survival curves (baseline survival function)
surv_curve <- survfit(cox.fit)

# Extract baseline hazard function
baseline_hazard <- -diff(surv_curve$surv) / diff(surv_curve$time)

# Plot the baseline hazard function
plot(surv_curve$time[-1], baseline_hazard, type = 'l',
     xlab = 'Time', ylab = 'Baseline Hazard',
     main = 'Baseline Hazard Function of Cox Model',
     ylim = c(0,0.005))

plot(fit, newdata=data.frame(ART = "On ART"),
     xlab="Time since enumeration (days)", main = "Univaraite flexible PH parametric model of ART plotted against KM")
lines(fit, newdata=data.frame(ART = "LTFU"), lty=2)
lines(fit, newdata=data.frame(ART = "ART Naive"), lty=3)
lines(survfit(Surv(Time, Censored)~ART, data=Model_Data), col="red", lty=1:3)
legend("topright", c("PH On ART","PH Disengaged","PH ART Naive",
                     "KM On ART","KM Disengaged","KM ART Naive"), 
       lty=1:3, col=c("black","black","black","red","red","red"))

Plotting hazard function

predART <- predict(fit, newdata=data.frame(ART = c("On ART", "LTFU", "ART Naive")),
                   type = "hazard", grid=TRUE, full=TRUE, se.fit=TRUE)
#Transform
predART <- transform(predART,ART=factor(ART,labels=c("On ART","Disengaged", "Naive")))
#Plot
ggplot(predART, aes(x=Time, y=Estimate, ymin=lower, ymax=upper, fill = ART)) + 
  xlab("Time since enumeration") +
  ylab("Hazard") + 
  geom_ribbon() +
  geom_line()

Additional stats and calculations

# Evaluating re-initiating ART
Disengaged <- subset(Cohort, ART_Exp_C == "On ART")
table(Disengaged$Censored)
## 
##    0    1 
## 2033  346
summary(Disengaged$Enumeration_CD4)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    74.0   126.0   117.2   166.0   199.0
Disengaged_Time <- subset(Disengaged, Reinitiated == 1)
Disengaged_Time_0 <- subset(Disengaged_Time, Time_Reinitiation == 0)
Disengaged_14 <- subset(Disengaged, Time_Reinitiation < 15)
# Re-initiated in Hospital
table(Disengaged$Hospital_Enumerations)
## 
##    0    1 
## 1813  566
Hospital_Disengaged <- subset(Disengaged, Hospital_Enumerations == 1)
Hospital_Disengaged <- subset(Hospital_Disengaged, Linked == 1)
Disengaged_H_60 <- subset(Hospital_Disengaged, Time_Reinitiation < 61)
Disengaged_H_D_60 <- subset(Hospital_Disengaged, Time_Reinitiation > 60 | Reinitiated == 0)
Disengaged_H_D_60_Less <- subset(Disengaged_H_D_60, Time < 61)
Disengaged_C <- subset(Disengaged_H_D_60, Time > 60)
table(Disengaged_C$Prevalent_Crypto)
## < table of extent 0 >
table(Disengaged_C$Current_TB_PHDC)
## < table of extent 0 >
Disengaged_C <- subset(Disengaged_C, Prevalent_Crypto == 0 & Current_TB_PHDC == 0)
table(Disengaged_C$Reinitiated)
## < table of extent 0 >
#Re-initiated in PHC
table(Disengaged$Hospital_Enumerations)
## 
##    0    1 
## 1813  566
PHC_Disengaged <- subset(Disengaged, Hospital_Enumerations == 0)
PHC_Disengaged <- subset(PHC_Disengaged, Linked == 1)
Disengaged_P_60 <- subset(PHC_Disengaged, Time_Reinitiation < 61)
Disengaged_P_D_60 <- subset(PHC_Disengaged, Time_Reinitiation > 60 | Reinitiated == 0)
Disengaged_P_D_60_Less <- subset(Disengaged_P_D_60, Time < 61)
Disengaged_P_C <- subset(Disengaged_P_D_60, Time > 60)
table(Disengaged_P_C$Prevalent_Crypto)
## < table of extent 0 >
table(Disengaged_P_C$Current_TB_PHDC)
## < table of extent 0 >
Disengaged_P_C <- subset(Disengaged_P_C, Prevalent_Crypto == 0 & Current_TB_PHDC == 0)
table(Disengaged_P_C$Reinitiated)
## < table of extent 0 >
# Evaulating First Starting ART
Naive <- subset(Cohort, ART_Exp_C == "Naive")
table(Naive$Censored)
## 
##    0    1 
## 5290  395
summary(Naive$Enumeration_CD4)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    56.0   108.0   105.9   157.0   199.0
Naive_Time <- subset(Naive, Subs_Initiated == 1)
Naive_Time_0 <- subset(Naive_Time, Time_To_First_ART == 0)
Naive_14 <- subset(Naive, Time_To_First_ART < 15)
#First ART in Hospital
table(Naive$Hospital_Enumerations)
## 
##    0    1 
## 5043  642
Hospital_Naive <- subset(Naive, Hospital_Enumerations == 1)
Hospital_Naive <- subset(Hospital_Naive, Linked == 1)
Naive_H_60 <- subset(Hospital_Naive, Time_To_First_ART < 61)
Naive_H_D_60 <- subset(Hospital_Naive, Time_To_First_ART > 60 | Subs_Initiated == 0)
Naive_H_D_60_Less <- subset(Naive_H_D_60, Time < 61)
Naive_C <- subset(Naive_H_D_60, Time > 60)
table(Naive_C$Prevalent_Crypto)
## 
##   0   1 
## 112   6
table(Naive_C$Current_TB_PHDC)
## 
##  0  1 
## 66 52
Naive_C <- subset(Naive_C, Prevalent_Crypto == 0 & Current_TB_PHDC == 0)
table(Naive_C$Subs_Initiated)
## 
##  0  1 
## 18 44
#First ART in PHC
table(Naive$Hospital_Enumerations)
## 
##    0    1 
## 5043  642
PHC_Naive <- subset(Naive, Hospital_Enumerations == 0)
PHC_Naive <- subset(PHC_Naive, Linked == 1)
Naive_P_60 <- subset(PHC_Naive, Time_To_First_ART < 61)
Naive_P_D_60 <- subset(PHC_Naive, Time_To_First_ART > 60 | Subs_Initiated == 0)
Naive_P_D_60_Less <- subset(Naive_P_D_60, Time < 61)
Naive_P_C <- subset(Naive_P_D_60, Time > 60)
table(Naive_P_C$Prevalent_Crypto)
## 
##   0   1 
## 634  11
table(Naive_P_C$Current_TB_PHDC)
## 
##   0   1 
## 477 168
Naive_P_C <- subset(Naive_P_C, Prevalent_Crypto == 0 & Current_TB_PHDC == 0)
table(Naive_P_C$Subs_Initiated)
## 
##   0   1 
##  74 397
table(Disengaged_C$Reinitiated, Disengaged_C$Censored)
## < table of extent 0 x 0 >
table(Disengaged_P_C$Reinitiated, Disengaged_P_C$Censored)
## < table of extent 0 x 0 >
table(Naive_C$Subs_Initiated, Naive_C$Censored)
##    
##      0  1
##   0 16  2
##   1 40  4
table(Naive_P_C$Subs_Initiated, Naive_P_C$Censored)
##    
##       0   1
##   0  68   6
##   1 370  27
#One year mortality
Cohort_Linked <- subset(Cohort, Linked  == 1)
Cohort_Year <- subset(Cohort_Linked, Enumeration_date <= "2020-03-30")
Cohort_Died_More <- subset(Cohort_Year, Censored==1 & Time >365)
Cohort_Died_More$More_than_Year <- 1
Cohort_Year$More_than_Year <- 
  Cohort_Died_More$More_than_Year[match(Cohort_Year$study_id, Cohort_Died_More$study_id)]
Cohort_Year$More_than_Year[is.na(Cohort_Year$More_than_Year)] = 0
Cohort_Year <- subset(Cohort_Year, More_than_Year < 1)
table(Cohort_Year$Censored)
## 
##    0    1 
## 8323  646
Cohort_Censored <- subset(Linked_Data, Mortality  == "Died")
plot(Cohort_Censored$Time)

Cen_Plot <- ggdensity(Cohort_Censored, x = "Time",
                        add = "median",
                        color = "ART", fill = "ART",
                        palette = "npg",
                      title = "Density plot of time to death by enumeration ART status")
Cen_Plot

Disengaged_60 <- subset(Disengaged, Time_Reinitiation < 61)
Naive_60 <- subset(Naive, Time_To_First_ART < 61)